1/* $NetBSD: softfloat.c,v 1.15 2024/02/27 15:14:15 christos Exp $ */
2
3/*
4 * This version hacked for use with gcc -msoft-float by bjh21.
5 * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6 *  itself).
7 */
8
9/*
10 * Things you may want to define:
11 *
12 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13 *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
14 *   properly renamed.
15 */
16
17/*
18===============================================================================
19
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
22
23Written by John R. Hauser.  This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704.  Funding was partially provided by the
26National Science Foundation under grant MIP-9311980.  The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31arithmetic/SoftFloat.html'.
32
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39Derivative works are acceptable, even for commercial purposes, so long as
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
43
44===============================================================================
45*/
46
47#include <sys/cdefs.h>
48#if defined(LIBC_SCCS) && !defined(lint)
49__RCSID("$NetBSD: softfloat.c,v 1.15 2024/02/27 15:14:15 christos Exp $");
50#endif /* LIBC_SCCS and not lint */
51
52#ifdef SOFTFLOAT_FOR_GCC
53#include "softfloat-for-gcc.h"
54#endif
55
56#include "milieu.h"
57#include "softfloat.h"
58
59/*
60 * Conversions between floats as stored in memory and floats as
61 * SoftFloat uses them
62 */
63#ifndef FLOAT64_DEMANGLE
64#define FLOAT64_DEMANGLE(a)	(a)
65#endif
66#ifndef FLOAT64_MANGLE
67#define FLOAT64_MANGLE(a)	(a)
68#endif
69
70/*
71-------------------------------------------------------------------------------
72Floating-point rounding mode, extended double-precision rounding precision,
73and exception flags.
74-------------------------------------------------------------------------------
75*/
76#ifndef set_float_rounding_mode
77fp_rnd float_rounding_mode = float_round_nearest_even;
78fp_except float_exception_flags = 0;
79#endif
80#ifndef set_float_exception_inexact_flag
81#define	set_float_exception_inexact_flag() \
82	((void)(float_exception_flags |= float_flag_inexact))
83#endif
84#ifdef FLOATX80
85int8 floatx80_rounding_precision = 80;
86#endif
87
88/*
89-------------------------------------------------------------------------------
90Primitive arithmetic functions, including multi-word arithmetic, and
91division and square root approximations.  (Can be specialized to target if
92desired.)
93-------------------------------------------------------------------------------
94*/
95#include "softfloat-macros"
96
97/*
98-------------------------------------------------------------------------------
99Functions and definitions to determine:  (1) whether tininess for underflow
100is detected before or after rounding by default, (2) what (if anything)
101happens when exceptions are raised, (3) how signaling NaNs are distinguished
102from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
103are propagated from function inputs to output.  These details are target-
104specific.
105-------------------------------------------------------------------------------
106*/
107#include "softfloat-specialize"
108
109#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
110/*
111-------------------------------------------------------------------------------
112Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
113and 7, and returns the properly rounded 32-bit integer corresponding to the
114input.  If `zSign' is 1, the input is negated before being converted to an
115integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
116is simply rounded to an integer, with the inexact exception raised if the
117input cannot be represented exactly as an integer.  However, if the fixed-
118point input is too large, the invalid exception is raised and the largest
119positive or negative integer is returned.
120-------------------------------------------------------------------------------
121*/
122static int32 roundAndPackInt32( flag zSign, bits64 absZ )
123{
124    int8 roundingMode;
125    flag roundNearestEven;
126    int8 roundIncrement, roundBits;
127    int32 z;
128
129    roundingMode = float_rounding_mode;
130    roundNearestEven = ( roundingMode == float_round_nearest_even );
131    roundIncrement = 0x40;
132    if ( ! roundNearestEven ) {
133        if ( roundingMode == float_round_to_zero ) {
134            roundIncrement = 0;
135        }
136        else {
137            roundIncrement = 0x7F;
138            if ( zSign ) {
139                if ( roundingMode == float_round_up ) roundIncrement = 0;
140            }
141            else {
142                if ( roundingMode == float_round_down ) roundIncrement = 0;
143            }
144        }
145    }
146    roundBits = (int8)(absZ & 0x7F);
147    absZ = ( absZ + roundIncrement )>>7;
148    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
149    z = (int32)absZ;
150    if ( zSign ) z = - z;
151    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
152        float_raise( float_flag_invalid );
153        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
154    }
155    if ( roundBits ) set_float_exception_inexact_flag();
156    return z;
157
158}
159
160/*
161-------------------------------------------------------------------------------
162Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
163`absZ1', with binary point between bits 63 and 64 (between the input words),
164and returns the properly rounded 64-bit integer corresponding to the input.
165If `zSign' is 1, the input is negated before being converted to an integer.
166Ordinarily, the fixed-point input is simply rounded to an integer, with
167the inexact exception raised if the input cannot be represented exactly as
168an integer.  However, if the fixed-point input is too large, the invalid
169exception is raised and the largest positive or negative integer is
170returned.
171-------------------------------------------------------------------------------
172*/
173static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
174{
175    int8 roundingMode;
176    flag roundNearestEven, increment;
177    int64 z;
178
179    roundingMode = float_rounding_mode;
180    roundNearestEven = ( roundingMode == float_round_nearest_even );
181    increment = ( (sbits64) absZ1 < 0 );
182    if ( ! roundNearestEven ) {
183        if ( roundingMode == float_round_to_zero ) {
184            increment = 0;
185        }
186        else {
187            if ( zSign ) {
188                increment = ( roundingMode == float_round_down ) && absZ1;
189            }
190            else {
191                increment = ( roundingMode == float_round_up ) && absZ1;
192            }
193        }
194    }
195    if ( increment ) {
196        ++absZ0;
197        if ( absZ0 == 0 ) goto overflow;
198        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
199    }
200    z = absZ0;
201    if ( zSign ) z = - z;
202    if ( z && ( ( z < 0 ) ^ zSign ) ) {
203 overflow:
204        float_raise( float_flag_invalid );
205        return
206              zSign ? (sbits64) LIT64( 0x8000000000000000 )
207            : LIT64( 0x7FFFFFFFFFFFFFFF );
208    }
209    if ( absZ1 ) set_float_exception_inexact_flag();
210    return z;
211
212}
213#endif
214
215/*
216-------------------------------------------------------------------------------
217Returns the fraction bits of the single-precision floating-point value `a'.
218-------------------------------------------------------------------------------
219*/
220INLINE bits32 extractFloat32Frac( float32 a )
221{
222
223    return a & 0x007FFFFF;
224
225}
226
227/*
228-------------------------------------------------------------------------------
229Returns the exponent bits of the single-precision floating-point value `a'.
230-------------------------------------------------------------------------------
231*/
232INLINE int16 extractFloat32Exp( float32 a )
233{
234
235    return ( a>>23 ) & 0xFF;
236
237}
238
239/*
240-------------------------------------------------------------------------------
241Returns the sign bit of the single-precision floating-point value `a'.
242-------------------------------------------------------------------------------
243*/
244INLINE flag extractFloat32Sign( float32 a )
245{
246
247    return a>>31;
248
249}
250
251/*
252-------------------------------------------------------------------------------
253Normalizes the subnormal single-precision floating-point value represented
254by the denormalized significand `aSig'.  The normalized exponent and
255significand are stored at the locations pointed to by `zExpPtr' and
256`zSigPtr', respectively.
257-------------------------------------------------------------------------------
258*/
259static void
260 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
261{
262    int8 shiftCount;
263
264    shiftCount = countLeadingZeros32( aSig ) - 8;
265    *zSigPtr = aSig<<shiftCount;
266    *zExpPtr = 1 - shiftCount;
267
268}
269
270/*
271-------------------------------------------------------------------------------
272Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
273single-precision floating-point value, returning the result.  After being
274shifted into the proper positions, the three fields are simply added
275together to form the result.  This means that any integer portion of `zSig'
276will be added into the exponent.  Since a properly normalized significand
277will have an integer portion equal to 1, the `zExp' input should be 1 less
278than the desired result exponent whenever `zSig' is a complete, normalized
279significand.
280-------------------------------------------------------------------------------
281*/
282INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
283{
284
285    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
286
287}
288
289/*
290-------------------------------------------------------------------------------
291Takes an abstract floating-point value having sign `zSign', exponent `zExp',
292and significand `zSig', and returns the proper single-precision floating-
293point value corresponding to the abstract input.  Ordinarily, the abstract
294value is simply rounded and packed into the single-precision format, with
295the inexact exception raised if the abstract input cannot be represented
296exactly.  However, if the abstract value is too large, the overflow and
297inexact exceptions are raised and an infinity or maximal finite value is
298returned.  If the abstract value is too small, the input value is rounded to
299a subnormal number, and the underflow and inexact exceptions are raised if
300the abstract input cannot be represented exactly as a subnormal single-
301precision floating-point number.
302    The input significand `zSig' has its binary point between bits 30
303and 29, which is 7 bits to the left of the usual location.  This shifted
304significand must be normalized or smaller.  If `zSig' is not normalized,
305`zExp' must be 0; in that case, the result returned is a subnormal number,
306and it must not require rounding.  In the usual case that `zSig' is
307normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
308The handling of underflow and overflow follows the IEC/IEEE Standard for
309Binary Floating-Point Arithmetic.
310-------------------------------------------------------------------------------
311*/
312static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
313{
314    int8 roundingMode;
315    flag roundNearestEven;
316    int8 roundIncrement, roundBits;
317    flag isTiny;
318
319    roundingMode = float_rounding_mode;
320    roundNearestEven = ( roundingMode == float_round_nearest_even );
321    roundIncrement = 0x40;
322    if ( ! roundNearestEven ) {
323        if ( roundingMode == float_round_to_zero ) {
324            roundIncrement = 0;
325        }
326        else {
327            roundIncrement = 0x7F;
328            if ( zSign ) {
329                if ( roundingMode == float_round_up ) roundIncrement = 0;
330            }
331            else {
332                if ( roundingMode == float_round_down ) roundIncrement = 0;
333            }
334        }
335    }
336    roundBits = zSig & 0x7F;
337    if ( 0xFD <= (bits16) zExp ) {
338        if (    ( 0xFD < zExp )
339             || (    ( zExp == 0xFD )
340                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
341           ) {
342            float_raise( float_flag_overflow | float_flag_inexact );
343            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
344        }
345        if ( zExp < 0 ) {
346            isTiny =
347                   ( float_detect_tininess == float_tininess_before_rounding )
348                || ( zExp < -1 )
349                || ( zSig + roundIncrement < 0x80000000U );
350            shift32RightJamming( zSig, - zExp, &zSig );
351            zExp = 0;
352            roundBits = zSig & 0x7F;
353            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
354        }
355    }
356    if ( roundBits ) set_float_exception_inexact_flag();
357    zSig = ( zSig + roundIncrement )>>7;
358    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
359    if ( zSig == 0 ) zExp = 0;
360    return packFloat32( zSign, zExp, zSig );
361
362}
363
364/*
365-------------------------------------------------------------------------------
366Takes an abstract floating-point value having sign `zSign', exponent `zExp',
367and significand `zSig', and returns the proper single-precision floating-
368point value corresponding to the abstract input.  This routine is just like
369`roundAndPackFloat32' except that `zSig' does not have to be normalized.
370Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
371floating-point exponent.
372-------------------------------------------------------------------------------
373*/
374static float32
375 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
376{
377    int8 shiftCount;
378
379    shiftCount = countLeadingZeros32( zSig ) - 1;
380    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
381
382}
383
384/*
385-------------------------------------------------------------------------------
386Returns the fraction bits of the double-precision floating-point value `a'.
387-------------------------------------------------------------------------------
388*/
389INLINE bits64 extractFloat64Frac( float64 a )
390{
391
392    return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
393
394}
395
396/*
397-------------------------------------------------------------------------------
398Returns the exponent bits of the double-precision floating-point value `a'.
399-------------------------------------------------------------------------------
400*/
401INLINE int16 extractFloat64Exp( float64 a )
402{
403
404    return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
405
406}
407
408/*
409-------------------------------------------------------------------------------
410Returns the sign bit of the double-precision floating-point value `a'.
411-------------------------------------------------------------------------------
412*/
413INLINE flag extractFloat64Sign( float64 a )
414{
415
416    return (flag)(FLOAT64_DEMANGLE(a) >> 63);
417
418}
419
420/*
421-------------------------------------------------------------------------------
422Normalizes the subnormal double-precision floating-point value represented
423by the denormalized significand `aSig'.  The normalized exponent and
424significand are stored at the locations pointed to by `zExpPtr' and
425`zSigPtr', respectively.
426-------------------------------------------------------------------------------
427*/
428static void
429 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
430{
431    int8 shiftCount;
432
433    shiftCount = countLeadingZeros64( aSig ) - 11;
434    *zSigPtr = aSig<<shiftCount;
435    *zExpPtr = 1 - shiftCount;
436
437}
438
439/*
440-------------------------------------------------------------------------------
441Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
442double-precision floating-point value, returning the result.  After being
443shifted into the proper positions, the three fields are simply added
444together to form the result.  This means that any integer portion of `zSig'
445will be added into the exponent.  Since a properly normalized significand
446will have an integer portion equal to 1, the `zExp' input should be 1 less
447than the desired result exponent whenever `zSig' is a complete, normalized
448significand.
449-------------------------------------------------------------------------------
450*/
451INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
452{
453
454    return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
455			   ( ( (bits64) zExp )<<52 ) + zSig );
456
457}
458
459/*
460-------------------------------------------------------------------------------
461Takes an abstract floating-point value having sign `zSign', exponent `zExp',
462and significand `zSig', and returns the proper double-precision floating-
463point value corresponding to the abstract input.  Ordinarily, the abstract
464value is simply rounded and packed into the double-precision format, with
465the inexact exception raised if the abstract input cannot be represented
466exactly.  However, if the abstract value is too large, the overflow and
467inexact exceptions are raised and an infinity or maximal finite value is
468returned.  If the abstract value is too small, the input value is rounded to
469a subnormal number, and the underflow and inexact exceptions are raised if
470the abstract input cannot be represented exactly as a subnormal double-
471precision floating-point number.
472    The input significand `zSig' has its binary point between bits 62
473and 61, which is 10 bits to the left of the usual location.  This shifted
474significand must be normalized or smaller.  If `zSig' is not normalized,
475`zExp' must be 0; in that case, the result returned is a subnormal number,
476and it must not require rounding.  In the usual case that `zSig' is
477normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
478The handling of underflow and overflow follows the IEC/IEEE Standard for
479Binary Floating-Point Arithmetic.
480-------------------------------------------------------------------------------
481*/
482static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
483{
484    int8 roundingMode;
485    flag roundNearestEven;
486    int16 roundIncrement, roundBits;
487    flag isTiny;
488
489    roundingMode = float_rounding_mode;
490    roundNearestEven = ( roundingMode == float_round_nearest_even );
491    roundIncrement = 0x200;
492    if ( ! roundNearestEven ) {
493        if ( roundingMode == float_round_to_zero ) {
494            roundIncrement = 0;
495        }
496        else {
497            roundIncrement = 0x3FF;
498            if ( zSign ) {
499                if ( roundingMode == float_round_up ) roundIncrement = 0;
500            }
501            else {
502                if ( roundingMode == float_round_down ) roundIncrement = 0;
503            }
504        }
505    }
506    roundBits = (int16)(zSig & 0x3FF);
507    if ( 0x7FD <= (bits16) zExp ) {
508        if (    ( 0x7FD < zExp )
509             || (    ( zExp == 0x7FD )
510                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
511           ) {
512            float_raise( float_flag_overflow | float_flag_inexact );
513            return FLOAT64_MANGLE(
514		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
515		( roundIncrement == 0 ));
516        }
517        if ( zExp < 0 ) {
518            isTiny =
519                   ( float_detect_tininess == float_tininess_before_rounding )
520                || ( zExp < -1 )
521                || ( zSig + roundIncrement < (bits64)LIT64( 0x8000000000000000 ) );
522            shift64RightJamming( zSig, - zExp, &zSig );
523            zExp = 0;
524            roundBits = (int16)(zSig & 0x3FF);
525            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
526        }
527    }
528    if ( roundBits ) set_float_exception_inexact_flag();
529    zSig = ( zSig + roundIncrement )>>10;
530    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
531    if ( zSig == 0 ) zExp = 0;
532    return packFloat64( zSign, zExp, zSig );
533
534}
535
536/*
537-------------------------------------------------------------------------------
538Takes an abstract floating-point value having sign `zSign', exponent `zExp',
539and significand `zSig', and returns the proper double-precision floating-
540point value corresponding to the abstract input.  This routine is just like
541`roundAndPackFloat64' except that `zSig' does not have to be normalized.
542Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
543floating-point exponent.
544-------------------------------------------------------------------------------
545*/
546static float64
547 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
548{
549    int8 shiftCount;
550
551    shiftCount = countLeadingZeros64( zSig ) - 1;
552    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
553
554}
555
556#ifdef FLOATX80
557
558/*
559-------------------------------------------------------------------------------
560Returns the fraction bits of the extended double-precision floating-point
561value `a'.
562-------------------------------------------------------------------------------
563*/
564INLINE bits64 extractFloatx80Frac( floatx80 a )
565{
566
567    return a.low;
568
569}
570
571/*
572-------------------------------------------------------------------------------
573Returns the exponent bits of the extended double-precision floating-point
574value `a'.
575-------------------------------------------------------------------------------
576*/
577INLINE int32 extractFloatx80Exp( floatx80 a )
578{
579
580    return a.high & 0x7FFF;
581
582}
583
584/*
585-------------------------------------------------------------------------------
586Returns the sign bit of the extended double-precision floating-point value
587`a'.
588-------------------------------------------------------------------------------
589*/
590INLINE flag extractFloatx80Sign( floatx80 a )
591{
592
593    return a.high>>15;
594
595}
596
597/*
598-------------------------------------------------------------------------------
599Normalizes the subnormal extended double-precision floating-point value
600represented by the denormalized significand `aSig'.  The normalized exponent
601and significand are stored at the locations pointed to by `zExpPtr' and
602`zSigPtr', respectively.
603-------------------------------------------------------------------------------
604*/
605static void
606 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
607{
608    int8 shiftCount;
609
610    shiftCount = countLeadingZeros64( aSig );
611    *zSigPtr = aSig<<shiftCount;
612    *zExpPtr = 1 - shiftCount;
613
614}
615
616/*
617-------------------------------------------------------------------------------
618Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
619extended double-precision floating-point value, returning the result.
620-------------------------------------------------------------------------------
621*/
622INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
623{
624    floatx80 z;
625
626    z.low = zSig;
627    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
628    return z;
629
630}
631
632/*
633-------------------------------------------------------------------------------
634Takes an abstract floating-point value having sign `zSign', exponent `zExp',
635and extended significand formed by the concatenation of `zSig0' and `zSig1',
636and returns the proper extended double-precision floating-point value
637corresponding to the abstract input.  Ordinarily, the abstract value is
638rounded and packed into the extended double-precision format, with the
639inexact exception raised if the abstract input cannot be represented
640exactly.  However, if the abstract value is too large, the overflow and
641inexact exceptions are raised and an infinity or maximal finite value is
642returned.  If the abstract value is too small, the input value is rounded to
643a subnormal number, and the underflow and inexact exceptions are raised if
644the abstract input cannot be represented exactly as a subnormal extended
645double-precision floating-point number.
646    If `roundingPrecision' is 32 or 64, the result is rounded to the same
647number of bits as single or double precision, respectively.  Otherwise, the
648result is rounded to the full precision of the extended double-precision
649format.
650    The input significand must be normalized or smaller.  If the input
651significand is not normalized, `zExp' must be 0; in that case, the result
652returned is a subnormal number, and it must not require rounding.  The
653handling of underflow and overflow follows the IEC/IEEE Standard for Binary
654Floating-Point Arithmetic.
655-------------------------------------------------------------------------------
656*/
657static floatx80
658 roundAndPackFloatx80(
659     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
660 )
661{
662    int8 roundingMode;
663    flag roundNearestEven, increment, isTiny;
664    uint64 roundIncrement, roundMask, roundBits;
665
666    roundingMode = float_rounding_mode;
667    roundNearestEven = ( roundingMode == float_round_nearest_even );
668    if ( roundingPrecision == 80 ) goto precision80;
669    if ( roundingPrecision == 64 ) {
670        roundIncrement = LIT64( 0x0000000000000400 );
671        roundMask = LIT64( 0x00000000000007FF );
672    }
673    else if ( roundingPrecision == 32 ) {
674        roundIncrement = LIT64( 0x0000008000000000 );
675        roundMask = LIT64( 0x000000FFFFFFFFFF );
676    }
677    else {
678        goto precision80;
679    }
680    zSig0 |= ( zSig1 != 0 );
681    if ( ! roundNearestEven ) {
682        if ( roundingMode == float_round_to_zero ) {
683            roundIncrement = 0;
684        }
685        else {
686            roundIncrement = roundMask;
687            if ( zSign ) {
688                if ( roundingMode == float_round_up ) roundIncrement = 0;
689            }
690            else {
691                if ( roundingMode == float_round_down ) roundIncrement = 0;
692            }
693        }
694    }
695    roundBits = zSig0 & roundMask;
696    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
697        if (    ( 0x7FFE < zExp )
698             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
699           ) {
700            goto overflow;
701        }
702        if ( zExp <= 0 ) {
703            isTiny =
704                   ( float_detect_tininess == float_tininess_before_rounding )
705                || ( zExp < 0 )
706                || ( zSig0 <= zSig0 + roundIncrement );
707            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
708            zExp = 0;
709            roundBits = zSig0 & roundMask;
710            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
711            if ( roundBits ) set_float_exception_inexact_flag();
712            zSig0 += roundIncrement;
713            if ( (sbits64) zSig0 < 0 ) zExp = 1;
714            roundIncrement = roundMask + 1;
715            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
716                roundMask |= roundIncrement;
717            }
718            zSig0 &= ~ roundMask;
719            return packFloatx80( zSign, zExp, zSig0 );
720        }
721    }
722    if ( roundBits ) set_float_exception_inexact_flag();
723    zSig0 += roundIncrement;
724    if ( zSig0 < roundIncrement ) {
725        ++zExp;
726        zSig0 = LIT64( 0x8000000000000000 );
727    }
728    roundIncrement = roundMask + 1;
729    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
730        roundMask |= roundIncrement;
731    }
732    zSig0 &= ~ roundMask;
733    if ( zSig0 == 0 ) zExp = 0;
734    return packFloatx80( zSign, zExp, zSig0 );
735 precision80:
736    increment = ( (sbits64) zSig1 < 0 );
737    if ( ! roundNearestEven ) {
738        if ( roundingMode == float_round_to_zero ) {
739            increment = 0;
740        }
741        else {
742            if ( zSign ) {
743                increment = ( roundingMode == float_round_down ) && zSig1;
744            }
745            else {
746                increment = ( roundingMode == float_round_up ) && zSig1;
747            }
748        }
749    }
750    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
751        if (    ( 0x7FFE < zExp )
752             || (    ( zExp == 0x7FFE )
753                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
754                  && increment
755                )
756           ) {
757            roundMask = 0;
758 overflow:
759            float_raise( float_flag_overflow | float_flag_inexact );
760            if (    ( roundingMode == float_round_to_zero )
761                 || ( zSign && ( roundingMode == float_round_up ) )
762                 || ( ! zSign && ( roundingMode == float_round_down ) )
763               ) {
764                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
765            }
766            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
767        }
768        if ( zExp <= 0 ) {
769            isTiny =
770                   ( float_detect_tininess == float_tininess_before_rounding )
771                || ( zExp < 0 )
772                || ! increment
773                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
774            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
775            zExp = 0;
776            if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
777            if ( zSig1 ) set_float_exception_inexact_flag();
778            if ( roundNearestEven ) {
779                increment = ( (sbits64) zSig1 < 0 );
780            }
781            else {
782                if ( zSign ) {
783                    increment = ( roundingMode == float_round_down ) && zSig1;
784                }
785                else {
786                    increment = ( roundingMode == float_round_up ) && zSig1;
787                }
788            }
789            if ( increment ) {
790                ++zSig0;
791                zSig0 &=
792                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
793                if ( (sbits64) zSig0 < 0 ) zExp = 1;
794            }
795            return packFloatx80( zSign, zExp, zSig0 );
796        }
797    }
798    if ( zSig1 ) set_float_exception_inexact_flag();
799    if ( increment ) {
800        ++zSig0;
801        if ( zSig0 == 0 ) {
802            ++zExp;
803            zSig0 = LIT64( 0x8000000000000000 );
804        }
805        else {
806            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
807        }
808    }
809    else {
810        if ( zSig0 == 0 ) zExp = 0;
811    }
812    return packFloatx80( zSign, zExp, zSig0 );
813
814}
815
816/*
817-------------------------------------------------------------------------------
818Takes an abstract floating-point value having sign `zSign', exponent
819`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
820and returns the proper extended double-precision floating-point value
821corresponding to the abstract input.  This routine is just like
822`roundAndPackFloatx80' except that the input significand does not have to be
823normalized.
824-------------------------------------------------------------------------------
825*/
826static floatx80
827 normalizeRoundAndPackFloatx80(
828     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
829 )
830{
831    int8 shiftCount;
832
833    if ( zSig0 == 0 ) {
834        zSig0 = zSig1;
835        zSig1 = 0;
836        zExp -= 64;
837    }
838    shiftCount = countLeadingZeros64( zSig0 );
839    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
840    zExp -= shiftCount;
841    return
842        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
843
844}
845
846#endif
847
848#ifdef FLOAT128
849
850/*
851-------------------------------------------------------------------------------
852Returns the least-significant 64 fraction bits of the quadruple-precision
853floating-point value `a'.
854-------------------------------------------------------------------------------
855*/
856INLINE bits64 extractFloat128Frac1( float128 a )
857{
858
859    return a.low;
860
861}
862
863/*
864-------------------------------------------------------------------------------
865Returns the most-significant 48 fraction bits of the quadruple-precision
866floating-point value `a'.
867-------------------------------------------------------------------------------
868*/
869INLINE bits64 extractFloat128Frac0( float128 a )
870{
871
872    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
873
874}
875
876/*
877-------------------------------------------------------------------------------
878Returns the exponent bits of the quadruple-precision floating-point value
879`a'.
880-------------------------------------------------------------------------------
881*/
882INLINE int32 extractFloat128Exp( float128 a )
883{
884
885    return (int32)((a.high >> 48) & 0x7FFF);
886
887}
888
889/*
890-------------------------------------------------------------------------------
891Returns the sign bit of the quadruple-precision floating-point value `a'.
892-------------------------------------------------------------------------------
893*/
894INLINE flag extractFloat128Sign( float128 a )
895{
896
897    return (flag)(a.high >> 63);
898
899}
900
901/*
902-------------------------------------------------------------------------------
903Normalizes the subnormal quadruple-precision floating-point value
904represented by the denormalized significand formed by the concatenation of
905`aSig0' and `aSig1'.  The normalized exponent is stored at the location
906pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
907significand are stored at the location pointed to by `zSig0Ptr', and the
908least significant 64 bits of the normalized significand are stored at the
909location pointed to by `zSig1Ptr'.
910-------------------------------------------------------------------------------
911*/
912static void
913 normalizeFloat128Subnormal(
914     bits64 aSig0,
915     bits64 aSig1,
916     int32 *zExpPtr,
917     bits64 *zSig0Ptr,
918     bits64 *zSig1Ptr
919 )
920{
921    int8 shiftCount;
922
923    if ( aSig0 == 0 ) {
924        shiftCount = countLeadingZeros64( aSig1 ) - 15;
925        if ( shiftCount < 0 ) {
926            *zSig0Ptr = aSig1>>( - shiftCount );
927            *zSig1Ptr = aSig1<<( shiftCount & 63 );
928        }
929        else {
930            *zSig0Ptr = aSig1<<shiftCount;
931            *zSig1Ptr = 0;
932        }
933        *zExpPtr = - shiftCount - 63;
934    }
935    else {
936        shiftCount = countLeadingZeros64( aSig0 ) - 15;
937        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
938        *zExpPtr = 1 - shiftCount;
939    }
940
941}
942
943/*
944-------------------------------------------------------------------------------
945Packs the sign `zSign', the exponent `zExp', and the significand formed
946by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
947floating-point value, returning the result.  After being shifted into the
948proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
949added together to form the most significant 32 bits of the result.  This
950means that any integer portion of `zSig0' will be added into the exponent.
951Since a properly normalized significand will have an integer portion equal
952to 1, the `zExp' input should be 1 less than the desired result exponent
953whenever `zSig0' and `zSig1' concatenated form a complete, normalized
954significand.
955-------------------------------------------------------------------------------
956*/
957INLINE float128
958 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
959{
960    float128 z;
961
962    z.low = zSig1;
963    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
964    return z;
965
966}
967
968/*
969-------------------------------------------------------------------------------
970Takes an abstract floating-point value having sign `zSign', exponent `zExp',
971and extended significand formed by the concatenation of `zSig0', `zSig1',
972and `zSig2', and returns the proper quadruple-precision floating-point value
973corresponding to the abstract input.  Ordinarily, the abstract value is
974simply rounded and packed into the quadruple-precision format, with the
975inexact exception raised if the abstract input cannot be represented
976exactly.  However, if the abstract value is too large, the overflow and
977inexact exceptions are raised and an infinity or maximal finite value is
978returned.  If the abstract value is too small, the input value is rounded to
979a subnormal number, and the underflow and inexact exceptions are raised if
980the abstract input cannot be represented exactly as a subnormal quadruple-
981precision floating-point number.
982    The input significand must be normalized or smaller.  If the input
983significand is not normalized, `zExp' must be 0; in that case, the result
984returned is a subnormal number, and it must not require rounding.  In the
985usual case that the input significand is normalized, `zExp' must be 1 less
986than the ``true'' floating-point exponent.  The handling of underflow and
987overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
988-------------------------------------------------------------------------------
989*/
990static float128
991 roundAndPackFloat128(
992     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
993{
994    int8 roundingMode;
995    flag roundNearestEven, increment, isTiny;
996
997    roundingMode = float_rounding_mode;
998    roundNearestEven = ( roundingMode == float_round_nearest_even );
999    increment = ( (sbits64) zSig2 < 0 );
1000    if ( ! roundNearestEven ) {
1001        if ( roundingMode == float_round_to_zero ) {
1002            increment = 0;
1003        }
1004        else {
1005            if ( zSign ) {
1006                increment = ( roundingMode == float_round_down ) && zSig2;
1007            }
1008            else {
1009                increment = ( roundingMode == float_round_up ) && zSig2;
1010            }
1011        }
1012    }
1013    if ( 0x7FFD <= (bits32) zExp ) {
1014        if (    ( 0x7FFD < zExp )
1015             || (    ( zExp == 0x7FFD )
1016                  && eq128(
1017                         LIT64( 0x0001FFFFFFFFFFFF ),
1018                         LIT64( 0xFFFFFFFFFFFFFFFF ),
1019                         zSig0,
1020                         zSig1
1021                     )
1022                  && increment
1023                )
1024           ) {
1025            float_raise( float_flag_overflow | float_flag_inexact );
1026            if (    ( roundingMode == float_round_to_zero )
1027                 || ( zSign && ( roundingMode == float_round_up ) )
1028                 || ( ! zSign && ( roundingMode == float_round_down ) )
1029               ) {
1030                return
1031                    packFloat128(
1032                        zSign,
1033                        0x7FFE,
1034                        LIT64( 0x0000FFFFFFFFFFFF ),
1035                        LIT64( 0xFFFFFFFFFFFFFFFF )
1036                    );
1037            }
1038            return packFloat128( zSign, 0x7FFF, 0, 0 );
1039        }
1040        if ( zExp < 0 ) {
1041            isTiny =
1042                   ( float_detect_tininess == float_tininess_before_rounding )
1043                || ( zExp < -1 )
1044                || ! increment
1045                || lt128(
1046                       zSig0,
1047                       zSig1,
1048                       LIT64( 0x0001FFFFFFFFFFFF ),
1049                       LIT64( 0xFFFFFFFFFFFFFFFF )
1050                   );
1051            shift128ExtraRightJamming(
1052                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1053            zExp = 0;
1054            if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
1055            if ( roundNearestEven ) {
1056                increment = ( (sbits64) zSig2 < 0 );
1057            }
1058            else {
1059                if ( zSign ) {
1060                    increment = ( roundingMode == float_round_down ) && zSig2;
1061                }
1062                else {
1063                    increment = ( roundingMode == float_round_up ) && zSig2;
1064                }
1065            }
1066        }
1067    }
1068    if ( zSig2 ) set_float_exception_inexact_flag();
1069    if ( increment ) {
1070        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1071        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1072    }
1073    else {
1074        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1075    }
1076    return packFloat128( zSign, zExp, zSig0, zSig1 );
1077
1078}
1079
1080/*
1081-------------------------------------------------------------------------------
1082Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1083and significand formed by the concatenation of `zSig0' and `zSig1', and
1084returns the proper quadruple-precision floating-point value corresponding
1085to the abstract input.  This routine is just like `roundAndPackFloat128'
1086except that the input significand has fewer bits and does not have to be
1087normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1088point exponent.
1089-------------------------------------------------------------------------------
1090*/
1091static float128
1092 normalizeRoundAndPackFloat128(
1093     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
1094{
1095    int8 shiftCount;
1096    bits64 zSig2;
1097
1098    if ( zSig0 == 0 ) {
1099        zSig0 = zSig1;
1100        zSig1 = 0;
1101        zExp -= 64;
1102    }
1103    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1104    if ( 0 <= shiftCount ) {
1105        zSig2 = 0;
1106        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1107    }
1108    else {
1109        shift128ExtraRightJamming(
1110            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1111    }
1112    zExp -= shiftCount;
1113    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
1114
1115}
1116
1117#endif
1118
1119/*
1120-------------------------------------------------------------------------------
1121Returns the result of converting the 32-bit two's complement integer `a'
1122to the single-precision floating-point format.  The conversion is performed
1123according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1124-------------------------------------------------------------------------------
1125*/
1126float32 int32_to_float32( int32 a )
1127{
1128    flag zSign;
1129
1130    if ( a == 0 ) return 0;
1131    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1132    zSign = ( a < 0 );
1133    return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
1134
1135}
1136
1137float32 uint32_to_float32( uint32 a )
1138{
1139    if ( a == 0 ) return 0;
1140    if ( a & (bits32) 0x80000000 )
1141	return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 );
1142    return normalizeRoundAndPackFloat32( 0, 0x9C, a );
1143}
1144
1145
1146/*
1147-------------------------------------------------------------------------------
1148Returns the result of converting the 32-bit two's complement integer `a'
1149to the double-precision floating-point format.  The conversion is performed
1150according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1151-------------------------------------------------------------------------------
1152*/
1153float64 int32_to_float64( int32 a )
1154{
1155    flag zSign;
1156    uint32 absA;
1157    int8 shiftCount;
1158    bits64 zSig;
1159
1160    if ( a == 0 ) return 0;
1161    zSign = ( a < 0 );
1162    absA = zSign ? - a : a;
1163    shiftCount = countLeadingZeros32( absA ) + 21;
1164    zSig = absA;
1165    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1166
1167}
1168
1169float64 uint32_to_float64( uint32 a )
1170{
1171    int8 shiftCount;
1172    bits64 zSig = a;
1173
1174    if ( a == 0 ) return 0;
1175    shiftCount = countLeadingZeros32( a ) + 21;
1176    return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount );
1177
1178}
1179
1180#ifdef FLOATX80
1181
1182/*
1183-------------------------------------------------------------------------------
1184Returns the result of converting the 32-bit two's complement integer `a'
1185to the extended double-precision floating-point format.  The conversion
1186is performed according to the IEC/IEEE Standard for Binary Floating-Point
1187Arithmetic.
1188-------------------------------------------------------------------------------
1189*/
1190floatx80 int32_to_floatx80( int32 a )
1191{
1192    flag zSign;
1193    uint32 absA;
1194    int8 shiftCount;
1195    bits64 zSig;
1196
1197    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1198    zSign = ( a < 0 );
1199    absA = zSign ? - a : a;
1200    shiftCount = countLeadingZeros32( absA ) + 32;
1201    zSig = absA;
1202    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1203
1204}
1205
1206floatx80 uint32_to_floatx80( uint32 a )
1207{
1208    int8 shiftCount;
1209    bits64 zSig = a;
1210
1211    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1212    shiftCount = countLeadingZeros32( a ) + 32;
1213    return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount );
1214
1215}
1216
1217#endif
1218
1219#ifdef FLOAT128
1220
1221/*
1222-------------------------------------------------------------------------------
1223Returns the result of converting the 32-bit two's complement integer `a' to
1224the quadruple-precision floating-point format.  The conversion is performed
1225according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1226-------------------------------------------------------------------------------
1227*/
1228float128 int32_to_float128( int32 a )
1229{
1230    flag zSign;
1231    uint32 absA;
1232    int8 shiftCount;
1233    bits64 zSig0;
1234
1235    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1236    zSign = ( a < 0 );
1237    absA = zSign ? - a : a;
1238    shiftCount = countLeadingZeros32( absA ) + 17;
1239    zSig0 = absA;
1240    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1241
1242}
1243
1244float128 uint32_to_float128( uint32 a )
1245{
1246    int8 shiftCount;
1247    bits64 zSig0 = a;
1248
1249    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1250    shiftCount = countLeadingZeros32( a ) + 17;
1251    return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1252
1253}
1254
1255#endif
1256
1257#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
1258/*
1259-------------------------------------------------------------------------------
1260Returns the result of converting the 64-bit two's complement integer `a'
1261to the single-precision floating-point format.  The conversion is performed
1262according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1263-------------------------------------------------------------------------------
1264*/
1265float32 int64_to_float32( int64 a )
1266{
1267    flag zSign;
1268    uint64 absA;
1269    int8 shiftCount;
1270
1271    if ( a == 0 ) return 0;
1272    zSign = ( a < 0 );
1273    absA = zSign ? - a : a;
1274    shiftCount = countLeadingZeros64( absA ) - 40;
1275    if ( 0 <= shiftCount ) {
1276        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1277    }
1278    else {
1279        shiftCount += 7;
1280        if ( shiftCount < 0 ) {
1281            shift64RightJamming( absA, - shiftCount, &absA );
1282        }
1283        else {
1284            absA <<= shiftCount;
1285        }
1286        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
1287    }
1288
1289}
1290
1291/*
1292-------------------------------------------------------------------------------
1293Returns the result of converting the 64-bit two's complement integer `a'
1294to the double-precision floating-point format.  The conversion is performed
1295according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1296-------------------------------------------------------------------------------
1297*/
1298float64 int64_to_float64( int64 a )
1299{
1300    flag zSign;
1301
1302    if ( a == 0 ) return 0;
1303    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1304        return packFloat64( 1, 0x43E, 0 );
1305    }
1306    zSign = ( a < 0 );
1307    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
1308
1309}
1310
1311#ifdef FLOATX80
1312
1313/*
1314-------------------------------------------------------------------------------
1315Returns the result of converting the 64-bit two's complement integer `a'
1316to the extended double-precision floating-point format.  The conversion
1317is performed according to the IEC/IEEE Standard for Binary Floating-Point
1318Arithmetic.
1319-------------------------------------------------------------------------------
1320*/
1321floatx80 int64_to_floatx80( int64 a )
1322{
1323    flag zSign;
1324    uint64 absA;
1325    int8 shiftCount;
1326
1327    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1328    zSign = ( a < 0 );
1329    absA = zSign ? - a : a;
1330    shiftCount = countLeadingZeros64( absA );
1331    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1332
1333}
1334
1335#endif
1336
1337#endif /* !SOFTFLOAT_FOR_GCC */
1338
1339#ifdef FLOAT128
1340
1341/*
1342-------------------------------------------------------------------------------
1343Returns the result of converting the 64-bit two's complement integer `a' to
1344the quadruple-precision floating-point format.  The conversion is performed
1345according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1346-------------------------------------------------------------------------------
1347*/
1348float128 int64_to_float128( int64 a )
1349{
1350    flag zSign;
1351    uint64 absA;
1352    int8 shiftCount;
1353    int32 zExp;
1354    bits64 zSig0, zSig1;
1355
1356    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1357    zSign = ( a < 0 );
1358    absA = zSign ? - a : a;
1359    shiftCount = countLeadingZeros64( absA ) + 49;
1360    zExp = 0x406E - shiftCount;
1361    if ( 64 <= shiftCount ) {
1362        zSig1 = 0;
1363        zSig0 = absA;
1364        shiftCount -= 64;
1365    }
1366    else {
1367        zSig1 = absA;
1368        zSig0 = 0;
1369    }
1370    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1371    return packFloat128( zSign, zExp, zSig0, zSig1 );
1372
1373}
1374
1375#endif
1376
1377#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1378/*
1379-------------------------------------------------------------------------------
1380Returns the result of converting the single-precision floating-point value
1381`a' to the 32-bit two's complement integer format.  The conversion is
1382performed according to the IEC/IEEE Standard for Binary Floating-Point
1383Arithmetic---which means in particular that the conversion is rounded
1384according to the current rounding mode.  If `a' is a NaN, the largest
1385positive integer is returned.  Otherwise, if the conversion overflows, the
1386largest integer with the same sign as `a' is returned.
1387-------------------------------------------------------------------------------
1388*/
1389int32 float32_to_int32( float32 a )
1390{
1391    flag aSign;
1392    int16 aExp, shiftCount;
1393    bits32 aSig;
1394    bits64 aSig64;
1395
1396    aSig = extractFloat32Frac( a );
1397    aExp = extractFloat32Exp( a );
1398    aSign = extractFloat32Sign( a );
1399    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1400    if ( aExp ) aSig |= 0x00800000;
1401    shiftCount = 0xAF - aExp;
1402    aSig64 = aSig;
1403    aSig64 <<= 32;
1404    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1405    return roundAndPackInt32( aSign, aSig64 );
1406
1407}
1408#endif /* !SOFTFLOAT_FOR_GCC */
1409
1410/*
1411-------------------------------------------------------------------------------
1412Returns the result of converting the single-precision floating-point value
1413`a' to the 32-bit two's complement integer format.  The conversion is
1414performed according to the IEC/IEEE Standard for Binary Floating-Point
1415Arithmetic, except that the conversion is always rounded toward zero.
1416If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1417the conversion overflows, the largest integer with the same sign as `a' is
1418returned.
1419-------------------------------------------------------------------------------
1420*/
1421int32 float32_to_int32_round_to_zero( float32 a )
1422{
1423    flag aSign;
1424    int16 aExp, shiftCount;
1425    bits32 aSig;
1426    int32 z;
1427
1428    aSig = extractFloat32Frac( a );
1429    aExp = extractFloat32Exp( a );
1430    aSign = extractFloat32Sign( a );
1431    shiftCount = aExp - 0x9E;
1432    if ( 0 <= shiftCount ) {
1433        if ( a != 0xCF000000 ) {
1434            float_raise( float_flag_invalid );
1435            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1436        }
1437        return (sbits32) 0x80000000;
1438    }
1439    else if ( aExp <= 0x7E ) {
1440        if ( aExp | aSig ) set_float_exception_inexact_flag();
1441        return 0;
1442    }
1443    aSig = ( aSig | 0x00800000 )<<8;
1444    z = aSig>>( - shiftCount );
1445    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1446        set_float_exception_inexact_flag();
1447    }
1448    if ( aSign ) z = - z;
1449    return z;
1450
1451}
1452
1453#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
1454/*
1455-------------------------------------------------------------------------------
1456Returns the result of converting the single-precision floating-point value
1457`a' to the 64-bit two's complement integer format.  The conversion is
1458performed according to the IEC/IEEE Standard for Binary Floating-Point
1459Arithmetic---which means in particular that the conversion is rounded
1460according to the current rounding mode.  If `a' is a NaN, the largest
1461positive integer is returned.  Otherwise, if the conversion overflows, the
1462largest integer with the same sign as `a' is returned.
1463-------------------------------------------------------------------------------
1464*/
1465int64 float32_to_int64( float32 a )
1466{
1467    flag aSign;
1468    int16 aExp, shiftCount;
1469    bits32 aSig;
1470    bits64 aSig64, aSigExtra;
1471
1472    aSig = extractFloat32Frac( a );
1473    aExp = extractFloat32Exp( a );
1474    aSign = extractFloat32Sign( a );
1475    shiftCount = 0xBE - aExp;
1476    if ( shiftCount < 0 ) {
1477        float_raise( float_flag_invalid );
1478        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1479            return LIT64( 0x7FFFFFFFFFFFFFFF );
1480        }
1481        return (sbits64) LIT64( 0x8000000000000000 );
1482    }
1483    if ( aExp ) aSig |= 0x00800000;
1484    aSig64 = aSig;
1485    aSig64 <<= 40;
1486    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1487    return roundAndPackInt64( aSign, aSig64, aSigExtra );
1488
1489}
1490
1491/*
1492-------------------------------------------------------------------------------
1493Returns the result of converting the single-precision floating-point value
1494`a' to the 64-bit two's complement integer format.  The conversion is
1495performed according to the IEC/IEEE Standard for Binary Floating-Point
1496Arithmetic, except that the conversion is always rounded toward zero.  If
1497`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1498conversion overflows, the largest integer with the same sign as `a' is
1499returned.
1500-------------------------------------------------------------------------------
1501*/
1502int64 float32_to_int64_round_to_zero( float32 a )
1503{
1504    flag aSign;
1505    int16 aExp, shiftCount;
1506    bits32 aSig;
1507    bits64 aSig64;
1508    int64 z;
1509
1510    aSig = extractFloat32Frac( a );
1511    aExp = extractFloat32Exp( a );
1512    aSign = extractFloat32Sign( a );
1513    shiftCount = aExp - 0xBE;
1514    if ( 0 <= shiftCount ) {
1515        if ( a != 0xDF000000 ) {
1516            float_raise( float_flag_invalid );
1517            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1518                return LIT64( 0x7FFFFFFFFFFFFFFF );
1519            }
1520        }
1521        return (sbits64) LIT64( 0x8000000000000000 );
1522    }
1523    else if ( aExp <= 0x7E ) {
1524        if ( aExp | aSig ) set_float_exception_inexact_flag();
1525        return 0;
1526    }
1527    aSig64 = aSig | 0x00800000;
1528    aSig64 <<= 40;
1529    z = aSig64>>( - shiftCount );
1530    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1531        set_float_exception_inexact_flag();
1532    }
1533    if ( aSign ) z = - z;
1534    return z;
1535
1536}
1537#endif /* !SOFTFLOAT_FOR_GCC */
1538
1539/*
1540-------------------------------------------------------------------------------
1541Returns the result of converting the single-precision floating-point value
1542`a' to the double-precision floating-point format.  The conversion is
1543performed according to the IEC/IEEE Standard for Binary Floating-Point
1544Arithmetic.
1545-------------------------------------------------------------------------------
1546*/
1547float64 float32_to_float64( float32 a )
1548{
1549    flag aSign;
1550    int16 aExp;
1551    bits32 aSig;
1552
1553    aSig = extractFloat32Frac( a );
1554    aExp = extractFloat32Exp( a );
1555    aSign = extractFloat32Sign( a );
1556    if ( aExp == 0xFF ) {
1557        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
1558        return packFloat64( aSign, 0x7FF, 0 );
1559    }
1560    if ( aExp == 0 ) {
1561        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1562        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1563        --aExp;
1564    }
1565    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1566
1567}
1568
1569#ifdef FLOATX80
1570
1571/*
1572-------------------------------------------------------------------------------
1573Returns the result of converting the single-precision floating-point value
1574`a' to the extended double-precision floating-point format.  The conversion
1575is performed according to the IEC/IEEE Standard for Binary Floating-Point
1576Arithmetic.
1577-------------------------------------------------------------------------------
1578*/
1579floatx80 float32_to_floatx80( float32 a )
1580{
1581    flag aSign;
1582    int16 aExp;
1583    bits32 aSig;
1584
1585    aSig = extractFloat32Frac( a );
1586    aExp = extractFloat32Exp( a );
1587    aSign = extractFloat32Sign( a );
1588    if ( aExp == 0xFF ) {
1589        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
1590        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1591    }
1592    if ( aExp == 0 ) {
1593        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1594        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1595    }
1596    aSig |= 0x00800000;
1597    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1598
1599}
1600
1601#endif
1602
1603#ifdef FLOAT128
1604
1605/*
1606-------------------------------------------------------------------------------
1607Returns the result of converting the single-precision floating-point value
1608`a' to the double-precision floating-point format.  The conversion is
1609performed according to the IEC/IEEE Standard for Binary Floating-Point
1610Arithmetic.
1611-------------------------------------------------------------------------------
1612*/
1613float128 float32_to_float128( float32 a )
1614{
1615    flag aSign;
1616    int16 aExp;
1617    bits32 aSig;
1618
1619    aSig = extractFloat32Frac( a );
1620    aExp = extractFloat32Exp( a );
1621    aSign = extractFloat32Sign( a );
1622    if ( aExp == 0xFF ) {
1623        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
1624        return packFloat128( aSign, 0x7FFF, 0, 0 );
1625    }
1626    if ( aExp == 0 ) {
1627        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1628        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1629        --aExp;
1630    }
1631    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1632
1633}
1634
1635#endif
1636
1637#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1638/*
1639-------------------------------------------------------------------------------
1640Rounds the single-precision floating-point value `a' to an integer, and
1641returns the result as a single-precision floating-point value.  The
1642operation is performed according to the IEC/IEEE Standard for Binary
1643Floating-Point Arithmetic.
1644-------------------------------------------------------------------------------
1645*/
1646float32 float32_round_to_int( float32 a )
1647{
1648    flag aSign;
1649    int16 aExp;
1650    bits32 lastBitMask, roundBitsMask;
1651    int8 roundingMode;
1652    float32 z;
1653
1654    aExp = extractFloat32Exp( a );
1655    if ( 0x96 <= aExp ) {
1656        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1657            return propagateFloat32NaN( a, a );
1658        }
1659        return a;
1660    }
1661    if ( aExp <= 0x7E ) {
1662        if ( (bits32) ( a<<1 ) == 0 ) return a;
1663        set_float_exception_inexact_flag();
1664        aSign = extractFloat32Sign( a );
1665        switch ( float_rounding_mode ) {
1666         case float_round_nearest_even:
1667            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1668                return packFloat32( aSign, 0x7F, 0 );
1669            }
1670            break;
1671	 case float_round_to_zero:
1672	    break;
1673         case float_round_down:
1674            return aSign ? 0xBF800000 : 0;
1675         case float_round_up:
1676            return aSign ? 0x80000000 : 0x3F800000;
1677        }
1678        return packFloat32( aSign, 0, 0 );
1679    }
1680    lastBitMask = 1;
1681    lastBitMask <<= 0x96 - aExp;
1682    roundBitsMask = lastBitMask - 1;
1683    z = a;
1684    roundingMode = float_rounding_mode;
1685    if ( roundingMode == float_round_nearest_even ) {
1686        z += lastBitMask>>1;
1687        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1688    }
1689    else if ( roundingMode != float_round_to_zero ) {
1690        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1691            z += roundBitsMask;
1692        }
1693    }
1694    z &= ~ roundBitsMask;
1695    if ( z != a ) set_float_exception_inexact_flag();
1696    return z;
1697
1698}
1699#endif /* !SOFTFLOAT_FOR_GCC */
1700
1701/*
1702-------------------------------------------------------------------------------
1703Returns the result of adding the absolute values of the single-precision
1704floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1705before being returned.  `zSign' is ignored if the result is a NaN.
1706The addition is performed according to the IEC/IEEE Standard for Binary
1707Floating-Point Arithmetic.
1708-------------------------------------------------------------------------------
1709*/
1710static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
1711{
1712    int16 aExp, bExp, zExp;
1713    bits32 aSig, bSig, zSig;
1714    int16 expDiff;
1715
1716    aSig = extractFloat32Frac( a );
1717    aExp = extractFloat32Exp( a );
1718    bSig = extractFloat32Frac( b );
1719    bExp = extractFloat32Exp( b );
1720    expDiff = aExp - bExp;
1721    aSig <<= 6;
1722    bSig <<= 6;
1723    if ( 0 < expDiff ) {
1724        if ( aExp == 0xFF ) {
1725            if ( aSig ) return propagateFloat32NaN( a, b );
1726            return a;
1727        }
1728        if ( bExp == 0 ) {
1729            --expDiff;
1730        }
1731        else {
1732            bSig |= 0x20000000;
1733        }
1734        shift32RightJamming( bSig, expDiff, &bSig );
1735        zExp = aExp;
1736    }
1737    else if ( expDiff < 0 ) {
1738        if ( bExp == 0xFF ) {
1739            if ( bSig ) return propagateFloat32NaN( a, b );
1740            return packFloat32( zSign, 0xFF, 0 );
1741        }
1742        if ( aExp == 0 ) {
1743            ++expDiff;
1744        }
1745        else {
1746            aSig |= 0x20000000;
1747        }
1748        shift32RightJamming( aSig, - expDiff, &aSig );
1749        zExp = bExp;
1750    }
1751    else {
1752        if ( aExp == 0xFF ) {
1753            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1754            return a;
1755        }
1756        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1757        zSig = 0x40000000 + aSig + bSig;
1758        zExp = aExp;
1759        goto roundAndPack;
1760    }
1761    aSig |= 0x20000000;
1762    zSig = ( aSig + bSig )<<1;
1763    --zExp;
1764    if ( (sbits32) zSig < 0 ) {
1765        zSig = aSig + bSig;
1766        ++zExp;
1767    }
1768 roundAndPack:
1769    return roundAndPackFloat32( zSign, zExp, zSig );
1770
1771}
1772
1773/*
1774-------------------------------------------------------------------------------
1775Returns the result of subtracting the absolute values of the single-
1776precision floating-point values `a' and `b'.  If `zSign' is 1, the
1777difference is negated before being returned.  `zSign' is ignored if the
1778result is a NaN.  The subtraction is performed according to the IEC/IEEE
1779Standard for Binary Floating-Point Arithmetic.
1780-------------------------------------------------------------------------------
1781*/
1782static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1783{
1784    int16 aExp, bExp, zExp;
1785    bits32 aSig, bSig, zSig;
1786    int16 expDiff;
1787
1788    aSig = extractFloat32Frac( a );
1789    aExp = extractFloat32Exp( a );
1790    bSig = extractFloat32Frac( b );
1791    bExp = extractFloat32Exp( b );
1792    expDiff = aExp - bExp;
1793    aSig <<= 7;
1794    bSig <<= 7;
1795    if ( 0 < expDiff ) goto aExpBigger;
1796    if ( expDiff < 0 ) goto bExpBigger;
1797    if ( aExp == 0xFF ) {
1798        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1799        float_raise( float_flag_invalid );
1800        return float32_default_nan;
1801    }
1802    if ( aExp == 0 ) {
1803        aExp = 1;
1804        bExp = 1;
1805    }
1806    if ( bSig < aSig ) goto aBigger;
1807    if ( aSig < bSig ) goto bBigger;
1808    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
1809 bExpBigger:
1810    if ( bExp == 0xFF ) {
1811        if ( bSig ) return propagateFloat32NaN( a, b );
1812        return packFloat32( zSign ^ 1, 0xFF, 0 );
1813    }
1814    if ( aExp == 0 ) {
1815        ++expDiff;
1816    }
1817    else {
1818        aSig |= 0x40000000;
1819    }
1820    shift32RightJamming( aSig, - expDiff, &aSig );
1821    bSig |= 0x40000000;
1822 bBigger:
1823    zSig = bSig - aSig;
1824    zExp = bExp;
1825    zSign ^= 1;
1826    goto normalizeRoundAndPack;
1827 aExpBigger:
1828    if ( aExp == 0xFF ) {
1829        if ( aSig ) return propagateFloat32NaN( a, b );
1830        return a;
1831    }
1832    if ( bExp == 0 ) {
1833        --expDiff;
1834    }
1835    else {
1836        bSig |= 0x40000000;
1837    }
1838    shift32RightJamming( bSig, expDiff, &bSig );
1839    aSig |= 0x40000000;
1840 aBigger:
1841    zSig = aSig - bSig;
1842    zExp = aExp;
1843 normalizeRoundAndPack:
1844    --zExp;
1845    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
1846
1847}
1848
1849/*
1850-------------------------------------------------------------------------------
1851Returns the result of adding the single-precision floating-point values `a'
1852and `b'.  The operation is performed according to the IEC/IEEE Standard for
1853Binary Floating-Point Arithmetic.
1854-------------------------------------------------------------------------------
1855*/
1856float32 float32_add( float32 a, float32 b )
1857{
1858    flag aSign, bSign;
1859
1860    aSign = extractFloat32Sign( a );
1861    bSign = extractFloat32Sign( b );
1862    if ( aSign == bSign ) {
1863        return addFloat32Sigs( a, b, aSign );
1864    }
1865    else {
1866        return subFloat32Sigs( a, b, aSign );
1867    }
1868
1869}
1870
1871/*
1872-------------------------------------------------------------------------------
1873Returns the result of subtracting the single-precision floating-point values
1874`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1875for Binary Floating-Point Arithmetic.
1876-------------------------------------------------------------------------------
1877*/
1878float32 float32_sub( float32 a, float32 b )
1879{
1880    flag aSign, bSign;
1881
1882    aSign = extractFloat32Sign( a );
1883    bSign = extractFloat32Sign( b );
1884    if ( aSign == bSign ) {
1885        return subFloat32Sigs( a, b, aSign );
1886    }
1887    else {
1888        return addFloat32Sigs( a, b, aSign );
1889    }
1890
1891}
1892
1893/*
1894-------------------------------------------------------------------------------
1895Returns the result of multiplying the single-precision floating-point values
1896`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1897for Binary Floating-Point Arithmetic.
1898-------------------------------------------------------------------------------
1899*/
1900float32 float32_mul( float32 a, float32 b )
1901{
1902    flag aSign, bSign, zSign;
1903    int16 aExp, bExp, zExp;
1904    bits32 aSig, bSig;
1905    bits64 zSig64;
1906    bits32 zSig;
1907
1908    aSig = extractFloat32Frac( a );
1909    aExp = extractFloat32Exp( a );
1910    aSign = extractFloat32Sign( a );
1911    bSig = extractFloat32Frac( b );
1912    bExp = extractFloat32Exp( b );
1913    bSign = extractFloat32Sign( b );
1914    zSign = aSign ^ bSign;
1915    if ( aExp == 0xFF ) {
1916        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1917            return propagateFloat32NaN( a, b );
1918        }
1919        if ( ( bExp | bSig ) == 0 ) {
1920            float_raise( float_flag_invalid );
1921            return float32_default_nan;
1922        }
1923        return packFloat32( zSign, 0xFF, 0 );
1924    }
1925    if ( bExp == 0xFF ) {
1926        if ( bSig ) return propagateFloat32NaN( a, b );
1927        if ( ( aExp | aSig ) == 0 ) {
1928            float_raise( float_flag_invalid );
1929            return float32_default_nan;
1930        }
1931        return packFloat32( zSign, 0xFF, 0 );
1932    }
1933    if ( aExp == 0 ) {
1934        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1935        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1936    }
1937    if ( bExp == 0 ) {
1938        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1939        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1940    }
1941    zExp = aExp + bExp - 0x7F;
1942    aSig = ( aSig | 0x00800000 )<<7;
1943    bSig = ( bSig | 0x00800000 )<<8;
1944    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1945    zSig = (bits32)zSig64;
1946    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1947        zSig <<= 1;
1948        --zExp;
1949    }
1950    return roundAndPackFloat32( zSign, zExp, zSig );
1951
1952}
1953
1954/*
1955-------------------------------------------------------------------------------
1956Returns the result of dividing the single-precision floating-point value `a'
1957by the corresponding value `b'.  The operation is performed according to the
1958IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1959-------------------------------------------------------------------------------
1960*/
1961float32 float32_div( float32 a, float32 b )
1962{
1963    flag aSign, bSign, zSign;
1964    int16 aExp, bExp, zExp;
1965    bits32 aSig, bSig, zSig;
1966
1967    aSig = extractFloat32Frac( a );
1968    aExp = extractFloat32Exp( a );
1969    aSign = extractFloat32Sign( a );
1970    bSig = extractFloat32Frac( b );
1971    bExp = extractFloat32Exp( b );
1972    bSign = extractFloat32Sign( b );
1973    zSign = aSign ^ bSign;
1974    if ( aExp == 0xFF ) {
1975        if ( aSig ) return propagateFloat32NaN( a, b );
1976        if ( bExp == 0xFF ) {
1977            if ( bSig ) return propagateFloat32NaN( a, b );
1978            float_raise( float_flag_invalid );
1979            return float32_default_nan;
1980        }
1981        return packFloat32( zSign, 0xFF, 0 );
1982    }
1983    if ( bExp == 0xFF ) {
1984        if ( bSig ) return propagateFloat32NaN( a, b );
1985        return packFloat32( zSign, 0, 0 );
1986    }
1987    if ( bExp == 0 ) {
1988        if ( bSig == 0 ) {
1989            if ( ( aExp | aSig ) == 0 ) {
1990                float_raise( float_flag_invalid );
1991                return float32_default_nan;
1992            }
1993            float_raise( float_flag_divbyzero );
1994            return packFloat32( zSign, 0xFF, 0 );
1995        }
1996        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1997    }
1998    if ( aExp == 0 ) {
1999        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2000        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2001    }
2002    zExp = aExp - bExp + 0x7D;
2003    aSig = ( aSig | 0x00800000 )<<7;
2004    bSig = ( bSig | 0x00800000 )<<8;
2005    if ( bSig <= ( aSig + aSig ) ) {
2006        aSig >>= 1;
2007        ++zExp;
2008    }
2009    zSig = (bits32)((((bits64) aSig) << 32) / bSig);
2010    if ( ( zSig & 0x3F ) == 0 ) {
2011        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
2012    }
2013    return roundAndPackFloat32( zSign, zExp, zSig );
2014
2015}
2016
2017#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2018/*
2019-------------------------------------------------------------------------------
2020Returns the remainder of the single-precision floating-point value `a'
2021with respect to the corresponding value `b'.  The operation is performed
2022according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2023-------------------------------------------------------------------------------
2024*/
2025float32 float32_rem( float32 a, float32 b )
2026{
2027    flag aSign, bSign, zSign;
2028    int16 aExp, bExp, expDiff;
2029    bits32 aSig, bSig;
2030    bits32 q;
2031    bits64 aSig64, bSig64, q64;
2032    bits32 alternateASig;
2033    sbits32 sigMean;
2034
2035    aSig = extractFloat32Frac( a );
2036    aExp = extractFloat32Exp( a );
2037    aSign = extractFloat32Sign( a );
2038    bSig = extractFloat32Frac( b );
2039    bExp = extractFloat32Exp( b );
2040    bSign = extractFloat32Sign( b );
2041    if ( aExp == 0xFF ) {
2042        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2043            return propagateFloat32NaN( a, b );
2044        }
2045        float_raise( float_flag_invalid );
2046        return float32_default_nan;
2047    }
2048    if ( bExp == 0xFF ) {
2049        if ( bSig ) return propagateFloat32NaN( a, b );
2050        return a;
2051    }
2052    if ( bExp == 0 ) {
2053        if ( bSig == 0 ) {
2054            float_raise( float_flag_invalid );
2055            return float32_default_nan;
2056        }
2057        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2058    }
2059    if ( aExp == 0 ) {
2060        if ( aSig == 0 ) return a;
2061        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2062    }
2063    expDiff = aExp - bExp;
2064    aSig |= 0x00800000;
2065    bSig |= 0x00800000;
2066    if ( expDiff < 32 ) {
2067        aSig <<= 8;
2068        bSig <<= 8;
2069        if ( expDiff < 0 ) {
2070            if ( expDiff < -1 ) return a;
2071            aSig >>= 1;
2072        }
2073        q = ( bSig <= aSig );
2074        if ( q ) aSig -= bSig;
2075        if ( 0 < expDiff ) {
2076            q = ( ( (bits64) aSig )<<32 ) / bSig;
2077            q >>= 32 - expDiff;
2078            bSig >>= 2;
2079            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2080        }
2081        else {
2082            aSig >>= 2;
2083            bSig >>= 2;
2084        }
2085    }
2086    else {
2087        if ( bSig <= aSig ) aSig -= bSig;
2088        aSig64 = ( (bits64) aSig )<<40;
2089        bSig64 = ( (bits64) bSig )<<40;
2090        expDiff -= 64;
2091        while ( 0 < expDiff ) {
2092            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2093            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2094            aSig64 = - ( ( bSig * q64 )<<38 );
2095            expDiff -= 62;
2096        }
2097        expDiff += 64;
2098        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2099        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2100        q = q64>>( 64 - expDiff );
2101        bSig <<= 6;
2102        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2103    }
2104    do {
2105        alternateASig = aSig;
2106        ++q;
2107        aSig -= bSig;
2108    } while ( 0 <= (sbits32) aSig );
2109    sigMean = aSig + alternateASig;
2110    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2111        aSig = alternateASig;
2112    }
2113    zSign = ( (sbits32) aSig < 0 );
2114    if ( zSign ) aSig = - aSig;
2115    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
2116
2117}
2118#endif /* !SOFTFLOAT_FOR_GCC */
2119
2120#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2121/*
2122-------------------------------------------------------------------------------
2123Returns the square root of the single-precision floating-point value `a'.
2124The operation is performed according to the IEC/IEEE Standard for Binary
2125Floating-Point Arithmetic.
2126-------------------------------------------------------------------------------
2127*/
2128float32 float32_sqrt( float32 a )
2129{
2130    flag aSign;
2131    int16 aExp, zExp;
2132    bits32 aSig, zSig;
2133    bits64 rem, term;
2134
2135    aSig = extractFloat32Frac( a );
2136    aExp = extractFloat32Exp( a );
2137    aSign = extractFloat32Sign( a );
2138    if ( aExp == 0xFF ) {
2139        if ( aSig ) return propagateFloat32NaN( a, 0 );
2140        if ( ! aSign ) return a;
2141        float_raise( float_flag_invalid );
2142        return float32_default_nan;
2143    }
2144    if ( aSign ) {
2145        if ( ( aExp | aSig ) == 0 ) return a;
2146        float_raise( float_flag_invalid );
2147        return float32_default_nan;
2148    }
2149    if ( aExp == 0 ) {
2150        if ( aSig == 0 ) return 0;
2151        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2152    }
2153    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2154    aSig = ( aSig | 0x00800000 )<<8;
2155    zSig = estimateSqrt32( aExp, aSig ) + 2;
2156    if ( ( zSig & 0x7F ) <= 5 ) {
2157        if ( zSig < 2 ) {
2158            zSig = 0x7FFFFFFF;
2159            goto roundAndPack;
2160        }
2161        aSig >>= aExp & 1;
2162        term = ( (bits64) zSig ) * zSig;
2163        rem = ( ( (bits64) aSig )<<32 ) - term;
2164        while ( (sbits64) rem < 0 ) {
2165            --zSig;
2166            rem += ( ( (bits64) zSig )<<1 ) | 1;
2167        }
2168        zSig |= ( rem != 0 );
2169    }
2170    shift32RightJamming( zSig, 1, &zSig );
2171 roundAndPack:
2172    return roundAndPackFloat32( 0, zExp, zSig );
2173
2174}
2175#endif /* !SOFTFLOAT_FOR_GCC */
2176
2177/*
2178-------------------------------------------------------------------------------
2179Returns 1 if the single-precision floating-point value `a' is equal to
2180the corresponding value `b', and 0 otherwise.  The comparison is performed
2181according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2182-------------------------------------------------------------------------------
2183*/
2184flag float32_eq( float32 a, float32 b )
2185{
2186
2187    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2188         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2189       ) {
2190        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2191            float_raise( float_flag_invalid );
2192        }
2193        return 0;
2194    }
2195    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2196
2197}
2198
2199/*
2200-------------------------------------------------------------------------------
2201Returns 1 if the single-precision floating-point value `a' is less than
2202or equal to the corresponding value `b', and 0 otherwise.  The comparison
2203is performed according to the IEC/IEEE Standard for Binary Floating-Point
2204Arithmetic.
2205-------------------------------------------------------------------------------
2206*/
2207flag float32_le( float32 a, float32 b )
2208{
2209    flag aSign, bSign;
2210
2211    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2212         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2213       ) {
2214        float_raise( float_flag_invalid );
2215        return 0;
2216    }
2217    aSign = extractFloat32Sign( a );
2218    bSign = extractFloat32Sign( b );
2219    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2220    return ( a == b ) || ( aSign ^ ( a < b ) );
2221
2222}
2223
2224/*
2225-------------------------------------------------------------------------------
2226Returns 1 if the single-precision floating-point value `a' is less than
2227the corresponding value `b', and 0 otherwise.  The comparison is performed
2228according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2229-------------------------------------------------------------------------------
2230*/
2231flag float32_lt( float32 a, float32 b )
2232{
2233    flag aSign, bSign;
2234
2235    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2236         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2237       ) {
2238        float_raise( float_flag_invalid );
2239        return 0;
2240    }
2241    aSign = extractFloat32Sign( a );
2242    bSign = extractFloat32Sign( b );
2243    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2244    return ( a != b ) && ( aSign ^ ( a < b ) );
2245
2246}
2247
2248#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2249/*
2250-------------------------------------------------------------------------------
2251Returns 1 if the single-precision floating-point value `a' is equal to
2252the corresponding value `b', and 0 otherwise.  The invalid exception is
2253raised if either operand is a NaN.  Otherwise, the comparison is performed
2254according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2255-------------------------------------------------------------------------------
2256*/
2257flag float32_eq_signaling( float32 a, float32 b )
2258{
2259
2260    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2261         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2262       ) {
2263        float_raise( float_flag_invalid );
2264        return 0;
2265    }
2266    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2267
2268}
2269
2270/*
2271-------------------------------------------------------------------------------
2272Returns 1 if the single-precision floating-point value `a' is less than or
2273equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2274cause an exception.  Otherwise, the comparison is performed according to the
2275IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2276-------------------------------------------------------------------------------
2277*/
2278flag float32_le_quiet( float32 a, float32 b )
2279{
2280    flag aSign, bSign;
2281
2282    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2283         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2284       ) {
2285        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2286            float_raise( float_flag_invalid );
2287        }
2288        return 0;
2289    }
2290    aSign = extractFloat32Sign( a );
2291    bSign = extractFloat32Sign( b );
2292    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2293    return ( a == b ) || ( aSign ^ ( a < b ) );
2294
2295}
2296
2297/*
2298-------------------------------------------------------------------------------
2299Returns 1 if the single-precision floating-point value `a' is less than
2300the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2301exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2302Standard for Binary Floating-Point Arithmetic.
2303-------------------------------------------------------------------------------
2304*/
2305flag float32_lt_quiet( float32 a, float32 b )
2306{
2307    flag aSign, bSign;
2308
2309    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2310         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2311       ) {
2312        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2313            float_raise( float_flag_invalid );
2314        }
2315        return 0;
2316    }
2317    aSign = extractFloat32Sign( a );
2318    bSign = extractFloat32Sign( b );
2319    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2320    return ( a != b ) && ( aSign ^ ( a < b ) );
2321
2322}
2323#endif /* !SOFTFLOAT_FOR_GCC */
2324
2325#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2326/*
2327-------------------------------------------------------------------------------
2328Returns the result of converting the double-precision floating-point value
2329`a' to the 32-bit two's complement integer format.  The conversion is
2330performed according to the IEC/IEEE Standard for Binary Floating-Point
2331Arithmetic---which means in particular that the conversion is rounded
2332according to the current rounding mode.  If `a' is a NaN, the largest
2333positive integer is returned.  Otherwise, if the conversion overflows, the
2334largest integer with the same sign as `a' is returned.
2335-------------------------------------------------------------------------------
2336*/
2337int32 float64_to_int32( float64 a )
2338{
2339    flag aSign;
2340    int16 aExp, shiftCount;
2341    bits64 aSig;
2342
2343    aSig = extractFloat64Frac( a );
2344    aExp = extractFloat64Exp( a );
2345    aSign = extractFloat64Sign( a );
2346    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2347    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2348    shiftCount = 0x42C - aExp;
2349    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2350    return roundAndPackInt32( aSign, aSig );
2351
2352}
2353#endif /* !SOFTFLOAT_FOR_GCC */
2354
2355/*
2356-------------------------------------------------------------------------------
2357Returns the result of converting the double-precision floating-point value
2358`a' to the 32-bit two's complement integer format.  The conversion is
2359performed according to the IEC/IEEE Standard for Binary Floating-Point
2360Arithmetic, except that the conversion is always rounded toward zero.
2361If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2362the conversion overflows, the largest integer with the same sign as `a' is
2363returned.
2364-------------------------------------------------------------------------------
2365*/
2366int32 float64_to_int32_round_to_zero( float64 a )
2367{
2368    flag aSign;
2369    int16 aExp, shiftCount;
2370    bits64 aSig, savedASig;
2371    int32 z;
2372
2373    aSig = extractFloat64Frac( a );
2374    aExp = extractFloat64Exp( a );
2375    aSign = extractFloat64Sign( a );
2376    if ( 0x41E < aExp ) {
2377        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2378        goto invalid;
2379    }
2380    else if ( aExp < 0x3FF ) {
2381        if ( aExp || aSig ) set_float_exception_inexact_flag();
2382        return 0;
2383    }
2384    aSig |= LIT64( 0x0010000000000000 );
2385    shiftCount = 0x433 - aExp;
2386    savedASig = aSig;
2387    aSig >>= shiftCount;
2388    z = (int32)aSig;
2389    if ( aSign ) z = - z;
2390    if ( ( z < 0 ) ^ aSign ) {
2391 invalid:
2392        float_raise( float_flag_invalid );
2393        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2394    }
2395    if ( ( aSig<<shiftCount ) != savedASig ) {
2396        set_float_exception_inexact_flag();
2397    }
2398    return z;
2399
2400}
2401
2402#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2403/*
2404-------------------------------------------------------------------------------
2405Returns the result of converting the double-precision floating-point value
2406`a' to the 64-bit two's complement integer format.  The conversion is
2407performed according to the IEC/IEEE Standard for Binary Floating-Point
2408Arithmetic---which means in particular that the conversion is rounded
2409according to the current rounding mode.  If `a' is a NaN, the largest
2410positive integer is returned.  Otherwise, if the conversion overflows, the
2411largest integer with the same sign as `a' is returned.
2412-------------------------------------------------------------------------------
2413*/
2414int64 float64_to_int64( float64 a )
2415{
2416    flag aSign;
2417    int16 aExp, shiftCount;
2418    bits64 aSig, aSigExtra;
2419
2420    aSig = extractFloat64Frac( a );
2421    aExp = extractFloat64Exp( a );
2422    aSign = extractFloat64Sign( a );
2423    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2424    shiftCount = 0x433 - aExp;
2425    if ( shiftCount <= 0 ) {
2426        if ( 0x43E < aExp ) {
2427            float_raise( float_flag_invalid );
2428            if (    ! aSign
2429                 || (    ( aExp == 0x7FF )
2430                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2431               ) {
2432                return LIT64( 0x7FFFFFFFFFFFFFFF );
2433            }
2434            return (sbits64) LIT64( 0x8000000000000000 );
2435        }
2436        aSigExtra = 0;
2437        aSig <<= - shiftCount;
2438    }
2439    else {
2440        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2441    }
2442    return roundAndPackInt64( aSign, aSig, aSigExtra );
2443
2444}
2445
2446/*
2447-------------------------------------------------------------------------------
2448Returns the result of converting the double-precision floating-point value
2449`a' to the 64-bit two's complement integer format.  The conversion is
2450performed according to the IEC/IEEE Standard for Binary Floating-Point
2451Arithmetic, except that the conversion is always rounded toward zero.
2452If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2453the conversion overflows, the largest integer with the same sign as `a' is
2454returned.
2455-------------------------------------------------------------------------------
2456*/
2457int64 float64_to_int64_round_to_zero( float64 a )
2458{
2459    flag aSign;
2460    int16 aExp, shiftCount;
2461    bits64 aSig;
2462    int64 z;
2463
2464    aSig = extractFloat64Frac( a );
2465    aExp = extractFloat64Exp( a );
2466    aSign = extractFloat64Sign( a );
2467    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2468    shiftCount = aExp - 0x433;
2469    if ( 0 <= shiftCount ) {
2470        if ( 0x43E <= aExp ) {
2471            if ( a != LIT64( 0xC3E0000000000000 ) ) {
2472                float_raise( float_flag_invalid );
2473                if (    ! aSign
2474                     || (    ( aExp == 0x7FF )
2475                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2476                   ) {
2477                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2478                }
2479            }
2480            return (sbits64) LIT64( 0x8000000000000000 );
2481        }
2482        z = aSig<<shiftCount;
2483    }
2484    else {
2485        if ( aExp < 0x3FE ) {
2486            if ( aExp | aSig ) set_float_exception_inexact_flag();
2487            return 0;
2488        }
2489        z = aSig>>( - shiftCount );
2490        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2491            set_float_exception_inexact_flag();
2492        }
2493    }
2494    if ( aSign ) z = - z;
2495    return z;
2496
2497}
2498#endif /* !SOFTFLOAT_FOR_GCC */
2499
2500/*
2501-------------------------------------------------------------------------------
2502Returns the result of converting the double-precision floating-point value
2503`a' to the single-precision floating-point format.  The conversion is
2504performed according to the IEC/IEEE Standard for Binary Floating-Point
2505Arithmetic.
2506-------------------------------------------------------------------------------
2507*/
2508float32 float64_to_float32( float64 a )
2509{
2510    flag aSign;
2511    int16 aExp;
2512    bits64 aSig;
2513    bits32 zSig;
2514
2515    aSig = extractFloat64Frac( a );
2516    aExp = extractFloat64Exp( a );
2517    aSign = extractFloat64Sign( a );
2518    if ( aExp == 0x7FF ) {
2519        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
2520        return packFloat32( aSign, 0xFF, 0 );
2521    }
2522    shift64RightJamming( aSig, 22, &aSig );
2523    zSig = (bits32)aSig;
2524    if ( aExp || zSig ) {
2525        zSig |= 0x40000000;
2526        aExp -= 0x381;
2527    }
2528    return roundAndPackFloat32( aSign, aExp, zSig );
2529
2530}
2531
2532#ifdef FLOATX80
2533
2534/*
2535-------------------------------------------------------------------------------
2536Returns the result of converting the double-precision floating-point value
2537`a' to the extended double-precision floating-point format.  The conversion
2538is performed according to the IEC/IEEE Standard for Binary Floating-Point
2539Arithmetic.
2540-------------------------------------------------------------------------------
2541*/
2542floatx80 float64_to_floatx80( float64 a )
2543{
2544    flag aSign;
2545    int16 aExp;
2546    bits64 aSig;
2547
2548    aSig = extractFloat64Frac( a );
2549    aExp = extractFloat64Exp( a );
2550    aSign = extractFloat64Sign( a );
2551    if ( aExp == 0x7FF ) {
2552        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
2553        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2554    }
2555    if ( aExp == 0 ) {
2556        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2557        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2558    }
2559    return
2560        packFloatx80(
2561            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2562
2563}
2564
2565#endif
2566
2567#ifdef FLOAT128
2568
2569/*
2570-------------------------------------------------------------------------------
2571Returns the result of converting the double-precision floating-point value
2572`a' to the quadruple-precision floating-point format.  The conversion is
2573performed according to the IEC/IEEE Standard for Binary Floating-Point
2574Arithmetic.
2575-------------------------------------------------------------------------------
2576*/
2577float128 float64_to_float128( float64 a )
2578{
2579    flag aSign;
2580    int16 aExp;
2581    bits64 aSig, zSig0, zSig1;
2582
2583    aSig = extractFloat64Frac( a );
2584    aExp = extractFloat64Exp( a );
2585    aSign = extractFloat64Sign( a );
2586    if ( aExp == 0x7FF ) {
2587        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
2588        return packFloat128( aSign, 0x7FFF, 0, 0 );
2589    }
2590    if ( aExp == 0 ) {
2591        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2592        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2593        --aExp;
2594    }
2595    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2596    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2597
2598}
2599
2600#endif
2601
2602#ifndef SOFTFLOAT_FOR_GCC
2603/*
2604-------------------------------------------------------------------------------
2605Rounds the double-precision floating-point value `a' to an integer, and
2606returns the result as a double-precision floating-point value.  The
2607operation is performed according to the IEC/IEEE Standard for Binary
2608Floating-Point Arithmetic.
2609-------------------------------------------------------------------------------
2610*/
2611float64 float64_round_to_int( float64 a )
2612{
2613    flag aSign;
2614    int16 aExp;
2615    bits64 lastBitMask, roundBitsMask;
2616    int8 roundingMode;
2617    float64 z;
2618
2619    aExp = extractFloat64Exp( a );
2620    if ( 0x433 <= aExp ) {
2621        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2622            return propagateFloat64NaN( a, a );
2623        }
2624        return a;
2625    }
2626    if ( aExp < 0x3FF ) {
2627        if ( (bits64) ( a<<1 ) == 0 ) return a;
2628        set_float_exception_inexact_flag();
2629        aSign = extractFloat64Sign( a );
2630        switch ( float_rounding_mode ) {
2631         case float_round_nearest_even:
2632            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2633                return packFloat64( aSign, 0x3FF, 0 );
2634            }
2635            break;
2636	 case float_round_to_zero:
2637	    break;
2638         case float_round_down:
2639            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
2640         case float_round_up:
2641            return
2642            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
2643        }
2644        return packFloat64( aSign, 0, 0 );
2645    }
2646    lastBitMask = 1;
2647    lastBitMask <<= 0x433 - aExp;
2648    roundBitsMask = lastBitMask - 1;
2649    z = a;
2650    roundingMode = float_rounding_mode;
2651    if ( roundingMode == float_round_nearest_even ) {
2652        z += lastBitMask>>1;
2653        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2654    }
2655    else if ( roundingMode != float_round_to_zero ) {
2656        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2657            z += roundBitsMask;
2658        }
2659    }
2660    z &= ~ roundBitsMask;
2661    if ( z != a ) set_float_exception_inexact_flag();
2662    return z;
2663
2664}
2665#endif
2666
2667/*
2668-------------------------------------------------------------------------------
2669Returns the result of adding the absolute values of the double-precision
2670floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
2671before being returned.  `zSign' is ignored if the result is a NaN.
2672The addition is performed according to the IEC/IEEE Standard for Binary
2673Floating-Point Arithmetic.
2674-------------------------------------------------------------------------------
2675*/
2676static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
2677{
2678    int16 aExp, bExp, zExp;
2679    bits64 aSig, bSig, zSig;
2680    int16 expDiff;
2681
2682    aSig = extractFloat64Frac( a );
2683    aExp = extractFloat64Exp( a );
2684    bSig = extractFloat64Frac( b );
2685    bExp = extractFloat64Exp( b );
2686    expDiff = aExp - bExp;
2687    aSig <<= 9;
2688    bSig <<= 9;
2689    if ( 0 < expDiff ) {
2690        if ( aExp == 0x7FF ) {
2691            if ( aSig ) return propagateFloat64NaN( a, b );
2692            return a;
2693        }
2694        if ( bExp == 0 ) {
2695            --expDiff;
2696        }
2697        else {
2698            bSig |= LIT64( 0x2000000000000000 );
2699        }
2700        shift64RightJamming( bSig, expDiff, &bSig );
2701        zExp = aExp;
2702    }
2703    else if ( expDiff < 0 ) {
2704        if ( bExp == 0x7FF ) {
2705            if ( bSig ) return propagateFloat64NaN( a, b );
2706            return packFloat64( zSign, 0x7FF, 0 );
2707        }
2708        if ( aExp == 0 ) {
2709            ++expDiff;
2710        }
2711        else {
2712            aSig |= LIT64( 0x2000000000000000 );
2713        }
2714        shift64RightJamming( aSig, - expDiff, &aSig );
2715        zExp = bExp;
2716    }
2717    else {
2718        if ( aExp == 0x7FF ) {
2719            if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2720            return a;
2721        }
2722        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2723        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2724        zExp = aExp;
2725        goto roundAndPack;
2726    }
2727    aSig |= LIT64( 0x2000000000000000 );
2728    zSig = ( aSig + bSig )<<1;
2729    --zExp;
2730    if ( (sbits64) zSig < 0 ) {
2731        zSig = aSig + bSig;
2732        ++zExp;
2733    }
2734 roundAndPack:
2735    return roundAndPackFloat64( zSign, zExp, zSig );
2736
2737}
2738
2739/*
2740-------------------------------------------------------------------------------
2741Returns the result of subtracting the absolute values of the double-
2742precision floating-point values `a' and `b'.  If `zSign' is 1, the
2743difference is negated before being returned.  `zSign' is ignored if the
2744result is a NaN.  The subtraction is performed according to the IEC/IEEE
2745Standard for Binary Floating-Point Arithmetic.
2746-------------------------------------------------------------------------------
2747*/
2748static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
2749{
2750    int16 aExp, bExp, zExp;
2751    bits64 aSig, bSig, zSig;
2752    int16 expDiff;
2753
2754    aSig = extractFloat64Frac( a );
2755    aExp = extractFloat64Exp( a );
2756    bSig = extractFloat64Frac( b );
2757    bExp = extractFloat64Exp( b );
2758    expDiff = aExp - bExp;
2759    aSig <<= 10;
2760    bSig <<= 10;
2761    if ( 0 < expDiff ) goto aExpBigger;
2762    if ( expDiff < 0 ) goto bExpBigger;
2763    if ( aExp == 0x7FF ) {
2764        if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2765        float_raise( float_flag_invalid );
2766        return float64_default_nan;
2767    }
2768    if ( aExp == 0 ) {
2769        aExp = 1;
2770        bExp = 1;
2771    }
2772    if ( bSig < aSig ) goto aBigger;
2773    if ( aSig < bSig ) goto bBigger;
2774    return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
2775 bExpBigger:
2776    if ( bExp == 0x7FF ) {
2777        if ( bSig ) return propagateFloat64NaN( a, b );
2778        return packFloat64( zSign ^ 1, 0x7FF, 0 );
2779    }
2780    if ( aExp == 0 ) {
2781        ++expDiff;
2782    }
2783    else {
2784        aSig |= LIT64( 0x4000000000000000 );
2785    }
2786    shift64RightJamming( aSig, - expDiff, &aSig );
2787    bSig |= LIT64( 0x4000000000000000 );
2788 bBigger:
2789    zSig = bSig - aSig;
2790    zExp = bExp;
2791    zSign ^= 1;
2792    goto normalizeRoundAndPack;
2793 aExpBigger:
2794    if ( aExp == 0x7FF ) {
2795        if ( aSig ) return propagateFloat64NaN( a, b );
2796        return a;
2797    }
2798    if ( bExp == 0 ) {
2799        --expDiff;
2800    }
2801    else {
2802        bSig |= LIT64( 0x4000000000000000 );
2803    }
2804    shift64RightJamming( bSig, expDiff, &bSig );
2805    aSig |= LIT64( 0x4000000000000000 );
2806 aBigger:
2807    zSig = aSig - bSig;
2808    zExp = aExp;
2809 normalizeRoundAndPack:
2810    --zExp;
2811    return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
2812
2813}
2814
2815/*
2816-------------------------------------------------------------------------------
2817Returns the result of adding the double-precision floating-point values `a'
2818and `b'.  The operation is performed according to the IEC/IEEE Standard for
2819Binary Floating-Point Arithmetic.
2820-------------------------------------------------------------------------------
2821*/
2822float64 float64_add( float64 a, float64 b )
2823{
2824    flag aSign, bSign;
2825
2826    aSign = extractFloat64Sign( a );
2827    bSign = extractFloat64Sign( b );
2828    if ( aSign == bSign ) {
2829        return addFloat64Sigs( a, b, aSign );
2830    }
2831    else {
2832        return subFloat64Sigs( a, b, aSign );
2833    }
2834
2835}
2836
2837/*
2838-------------------------------------------------------------------------------
2839Returns the result of subtracting the double-precision floating-point values
2840`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2841for Binary Floating-Point Arithmetic.
2842-------------------------------------------------------------------------------
2843*/
2844float64 float64_sub( float64 a, float64 b )
2845{
2846    flag aSign, bSign;
2847
2848    aSign = extractFloat64Sign( a );
2849    bSign = extractFloat64Sign( b );
2850    if ( aSign == bSign ) {
2851        return subFloat64Sigs( a, b, aSign );
2852    }
2853    else {
2854        return addFloat64Sigs( a, b, aSign );
2855    }
2856
2857}
2858
2859/*
2860-------------------------------------------------------------------------------
2861Returns the result of multiplying the double-precision floating-point values
2862`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2863for Binary Floating-Point Arithmetic.
2864-------------------------------------------------------------------------------
2865*/
2866float64 float64_mul( float64 a, float64 b )
2867{
2868    flag aSign, bSign, zSign;
2869    int16 aExp, bExp, zExp;
2870    bits64 aSig, bSig, zSig0, zSig1;
2871
2872    aSig = extractFloat64Frac( a );
2873    aExp = extractFloat64Exp( a );
2874    aSign = extractFloat64Sign( a );
2875    bSig = extractFloat64Frac( b );
2876    bExp = extractFloat64Exp( b );
2877    bSign = extractFloat64Sign( b );
2878    zSign = aSign ^ bSign;
2879    if ( aExp == 0x7FF ) {
2880        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2881            return propagateFloat64NaN( a, b );
2882        }
2883        if ( ( bExp | bSig ) == 0 ) {
2884            float_raise( float_flag_invalid );
2885            return float64_default_nan;
2886        }
2887        return packFloat64( zSign, 0x7FF, 0 );
2888    }
2889    if ( bExp == 0x7FF ) {
2890        if ( bSig ) return propagateFloat64NaN( a, b );
2891        if ( ( aExp | aSig ) == 0 ) {
2892            float_raise( float_flag_invalid );
2893            return float64_default_nan;
2894        }
2895        return packFloat64( zSign, 0x7FF, 0 );
2896    }
2897    if ( aExp == 0 ) {
2898        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2899        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2900    }
2901    if ( bExp == 0 ) {
2902        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2903        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2904    }
2905    zExp = aExp + bExp - 0x3FF;
2906    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2907    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2908    mul64To128( aSig, bSig, &zSig0, &zSig1 );
2909    zSig0 |= ( zSig1 != 0 );
2910    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2911        zSig0 <<= 1;
2912        --zExp;
2913    }
2914    return roundAndPackFloat64( zSign, zExp, zSig0 );
2915
2916}
2917
2918/*
2919-------------------------------------------------------------------------------
2920Returns the result of dividing the double-precision floating-point value `a'
2921by the corresponding value `b'.  The operation is performed according to
2922the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2923-------------------------------------------------------------------------------
2924*/
2925float64 float64_div( float64 a, float64 b )
2926{
2927    flag aSign, bSign, zSign;
2928    int16 aExp, bExp, zExp;
2929    bits64 aSig, bSig, zSig;
2930    bits64 rem0, rem1;
2931    bits64 term0, term1;
2932
2933    aSig = extractFloat64Frac( a );
2934    aExp = extractFloat64Exp( a );
2935    aSign = extractFloat64Sign( a );
2936    bSig = extractFloat64Frac( b );
2937    bExp = extractFloat64Exp( b );
2938    bSign = extractFloat64Sign( b );
2939    zSign = aSign ^ bSign;
2940    if ( aExp == 0x7FF ) {
2941        if ( aSig ) return propagateFloat64NaN( a, b );
2942        if ( bExp == 0x7FF ) {
2943            if ( bSig ) return propagateFloat64NaN( a, b );
2944            float_raise( float_flag_invalid );
2945            return float64_default_nan;
2946        }
2947        return packFloat64( zSign, 0x7FF, 0 );
2948    }
2949    if ( bExp == 0x7FF ) {
2950        if ( bSig ) return propagateFloat64NaN( a, b );
2951        return packFloat64( zSign, 0, 0 );
2952    }
2953    if ( bExp == 0 ) {
2954        if ( bSig == 0 ) {
2955            if ( ( aExp | aSig ) == 0 ) {
2956                float_raise( float_flag_invalid );
2957                return float64_default_nan;
2958            }
2959            float_raise( float_flag_divbyzero );
2960            return packFloat64( zSign, 0x7FF, 0 );
2961        }
2962        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2963    }
2964    if ( aExp == 0 ) {
2965        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2966        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2967    }
2968    zExp = aExp - bExp + 0x3FD;
2969    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2970    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2971    if ( bSig <= ( aSig + aSig ) ) {
2972        aSig >>= 1;
2973        ++zExp;
2974    }
2975    zSig = estimateDiv128To64( aSig, 0, bSig );
2976    if ( ( zSig & 0x1FF ) <= 2 ) {
2977        mul64To128( bSig, zSig, &term0, &term1 );
2978        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2979        while ( (sbits64) rem0 < 0 ) {
2980            --zSig;
2981            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2982        }
2983        zSig |= ( rem1 != 0 );
2984    }
2985    return roundAndPackFloat64( zSign, zExp, zSig );
2986
2987}
2988
2989#ifndef SOFTFLOAT_FOR_GCC
2990/*
2991-------------------------------------------------------------------------------
2992Returns the remainder of the double-precision floating-point value `a'
2993with respect to the corresponding value `b'.  The operation is performed
2994according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2995-------------------------------------------------------------------------------
2996*/
2997float64 float64_rem( float64 a, float64 b )
2998{
2999    flag aSign, bSign, zSign;
3000    int16 aExp, bExp, expDiff;
3001    bits64 aSig, bSig;
3002    bits64 q, alternateASig;
3003    sbits64 sigMean;
3004
3005    aSig = extractFloat64Frac( a );
3006    aExp = extractFloat64Exp( a );
3007    aSign = extractFloat64Sign( a );
3008    bSig = extractFloat64Frac( b );
3009    bExp = extractFloat64Exp( b );
3010    bSign = extractFloat64Sign( b );
3011    if ( aExp == 0x7FF ) {
3012        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3013            return propagateFloat64NaN( a, b );
3014        }
3015        float_raise( float_flag_invalid );
3016        return float64_default_nan;
3017    }
3018    if ( bExp == 0x7FF ) {
3019        if ( bSig ) return propagateFloat64NaN( a, b );
3020        return a;
3021    }
3022    if ( bExp == 0 ) {
3023        if ( bSig == 0 ) {
3024            float_raise( float_flag_invalid );
3025            return float64_default_nan;
3026        }
3027        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3028    }
3029    if ( aExp == 0 ) {
3030        if ( aSig == 0 ) return a;
3031        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3032    }
3033    expDiff = aExp - bExp;
3034    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3035    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3036    if ( expDiff < 0 ) {
3037        if ( expDiff < -1 ) return a;
3038        aSig >>= 1;
3039    }
3040    q = ( bSig <= aSig );
3041    if ( q ) aSig -= bSig;
3042    expDiff -= 64;
3043    while ( 0 < expDiff ) {
3044        q = estimateDiv128To64( aSig, 0, bSig );
3045        q = ( 2 < q ) ? q - 2 : 0;
3046        aSig = - ( ( bSig>>2 ) * q );
3047        expDiff -= 62;
3048    }
3049    expDiff += 64;
3050    if ( 0 < expDiff ) {
3051        q = estimateDiv128To64( aSig, 0, bSig );
3052        q = ( 2 < q ) ? q - 2 : 0;
3053        q >>= 64 - expDiff;
3054        bSig >>= 2;
3055        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3056    }
3057    else {
3058        aSig >>= 2;
3059        bSig >>= 2;
3060    }
3061    do {
3062        alternateASig = aSig;
3063        ++q;
3064        aSig -= bSig;
3065    } while ( 0 <= (sbits64) aSig );
3066    sigMean = aSig + alternateASig;
3067    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3068        aSig = alternateASig;
3069    }
3070    zSign = ( (sbits64) aSig < 0 );
3071    if ( zSign ) aSig = - aSig;
3072    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
3073
3074}
3075
3076/*
3077-------------------------------------------------------------------------------
3078Returns the square root of the double-precision floating-point value `a'.
3079The operation is performed according to the IEC/IEEE Standard for Binary
3080Floating-Point Arithmetic.
3081-------------------------------------------------------------------------------
3082*/
3083float64 float64_sqrt( float64 a )
3084{
3085    flag aSign;
3086    int16 aExp, zExp;
3087    bits64 aSig, zSig, doubleZSig;
3088    bits64 rem0, rem1, term0, term1;
3089
3090    aSig = extractFloat64Frac( a );
3091    aExp = extractFloat64Exp( a );
3092    aSign = extractFloat64Sign( a );
3093    if ( aExp == 0x7FF ) {
3094        if ( aSig ) return propagateFloat64NaN( a, a );
3095        if ( ! aSign ) return a;
3096        float_raise( float_flag_invalid );
3097        return float64_default_nan;
3098    }
3099    if ( aSign ) {
3100        if ( ( aExp | aSig ) == 0 ) return a;
3101        float_raise( float_flag_invalid );
3102        return float64_default_nan;
3103    }
3104    if ( aExp == 0 ) {
3105        if ( aSig == 0 ) return 0;
3106        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3107    }
3108    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3109    aSig |= LIT64( 0x0010000000000000 );
3110    zSig = estimateSqrt32( aExp, aSig>>21 );
3111    aSig <<= 9 - ( aExp & 1 );
3112    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3113    if ( ( zSig & 0x1FF ) <= 5 ) {
3114        doubleZSig = zSig<<1;
3115        mul64To128( zSig, zSig, &term0, &term1 );
3116        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3117        while ( (sbits64) rem0 < 0 ) {
3118            --zSig;
3119            doubleZSig -= 2;
3120            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3121        }
3122        zSig |= ( ( rem0 | rem1 ) != 0 );
3123    }
3124    return roundAndPackFloat64( 0, zExp, zSig );
3125
3126}
3127#endif
3128
3129/*
3130-------------------------------------------------------------------------------
3131Returns 1 if the double-precision floating-point value `a' is equal to the
3132corresponding value `b', and 0 otherwise.  The comparison is performed
3133according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3134-------------------------------------------------------------------------------
3135*/
3136flag float64_eq( float64 a, float64 b )
3137{
3138
3139    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3140         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3141       ) {
3142        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3143            float_raise( float_flag_invalid );
3144        }
3145        return 0;
3146    }
3147    return ( a == b ) ||
3148	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
3149
3150}
3151
3152/*
3153-------------------------------------------------------------------------------
3154Returns 1 if the double-precision floating-point value `a' is less than or
3155equal to the corresponding value `b', and 0 otherwise.  The comparison is
3156performed according to the IEC/IEEE Standard for Binary Floating-Point
3157Arithmetic.
3158-------------------------------------------------------------------------------
3159*/
3160flag float64_le( float64 a, float64 b )
3161{
3162    flag aSign, bSign;
3163
3164    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3165         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3166       ) {
3167        float_raise( float_flag_invalid );
3168        return 0;
3169    }
3170    aSign = extractFloat64Sign( a );
3171    bSign = extractFloat64Sign( b );
3172    if ( aSign != bSign )
3173	return aSign ||
3174	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
3175	      0 );
3176    return ( a == b ) ||
3177	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3178
3179}
3180
3181/*
3182-------------------------------------------------------------------------------
3183Returns 1 if the double-precision floating-point value `a' is less than
3184the corresponding value `b', and 0 otherwise.  The comparison is performed
3185according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3186-------------------------------------------------------------------------------
3187*/
3188flag float64_lt( float64 a, float64 b )
3189{
3190    flag aSign, bSign;
3191
3192    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3193         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3194       ) {
3195        float_raise( float_flag_invalid );
3196        return 0;
3197    }
3198    aSign = extractFloat64Sign( a );
3199    bSign = extractFloat64Sign( b );
3200    if ( aSign != bSign )
3201	return aSign &&
3202	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
3203	      0 );
3204    return ( a != b ) &&
3205	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3206
3207}
3208
3209#ifndef SOFTFLOAT_FOR_GCC
3210/*
3211-------------------------------------------------------------------------------
3212Returns 1 if the double-precision floating-point value `a' is equal to the
3213corresponding value `b', and 0 otherwise.  The invalid exception is raised
3214if either operand is a NaN.  Otherwise, the comparison is performed
3215according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3216-------------------------------------------------------------------------------
3217*/
3218flag float64_eq_signaling( float64 a, float64 b )
3219{
3220
3221    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3222         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3223       ) {
3224        float_raise( float_flag_invalid );
3225        return 0;
3226    }
3227    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
3228
3229}
3230
3231/*
3232-------------------------------------------------------------------------------
3233Returns 1 if the double-precision floating-point value `a' is less than or
3234equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3235cause an exception.  Otherwise, the comparison is performed according to the
3236IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3237-------------------------------------------------------------------------------
3238*/
3239flag float64_le_quiet( float64 a, float64 b )
3240{
3241    flag aSign, bSign;
3242
3243    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3244         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3245       ) {
3246        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3247            float_raise( float_flag_invalid );
3248        }
3249        return 0;
3250    }
3251    aSign = extractFloat64Sign( a );
3252    bSign = extractFloat64Sign( b );
3253    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
3254    return ( a == b ) || ( aSign ^ ( a < b ) );
3255
3256}
3257
3258/*
3259-------------------------------------------------------------------------------
3260Returns 1 if the double-precision floating-point value `a' is less than
3261the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3262exception.  Otherwise, the comparison is performed according to the IEC/IEEE
3263Standard for Binary Floating-Point Arithmetic.
3264-------------------------------------------------------------------------------
3265*/
3266flag float64_lt_quiet( float64 a, float64 b )
3267{
3268    flag aSign, bSign;
3269
3270    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3271         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3272       ) {
3273        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3274            float_raise( float_flag_invalid );
3275        }
3276        return 0;
3277    }
3278    aSign = extractFloat64Sign( a );
3279    bSign = extractFloat64Sign( b );
3280    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
3281    return ( a != b ) && ( aSign ^ ( a < b ) );
3282
3283}
3284#endif
3285
3286#ifdef FLOATX80
3287
3288/*
3289-------------------------------------------------------------------------------
3290Returns the result of converting the extended double-precision floating-
3291point value `a' to the 32-bit two's complement integer format.  The
3292conversion is performed according to the IEC/IEEE Standard for Binary
3293Floating-Point Arithmetic---which means in particular that the conversion
3294is rounded according to the current rounding mode.  If `a' is a NaN, the
3295largest positive integer is returned.  Otherwise, if the conversion
3296overflows, the largest integer with the same sign as `a' is returned.
3297-------------------------------------------------------------------------------
3298*/
3299int32 floatx80_to_int32( floatx80 a )
3300{
3301    flag aSign;
3302    int32 aExp, shiftCount;
3303    bits64 aSig;
3304
3305    aSig = extractFloatx80Frac( a );
3306    aExp = extractFloatx80Exp( a );
3307    aSign = extractFloatx80Sign( a );
3308    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3309    shiftCount = 0x4037 - aExp;
3310    if ( shiftCount <= 0 ) shiftCount = 1;
3311    shift64RightJamming( aSig, shiftCount, &aSig );
3312    return roundAndPackInt32( aSign, aSig );
3313
3314}
3315
3316/*
3317-------------------------------------------------------------------------------
3318Returns the result of converting the extended double-precision floating-
3319point value `a' to the 32-bit two's complement integer format.  The
3320conversion is performed according to the IEC/IEEE Standard for Binary
3321Floating-Point Arithmetic, except that the conversion is always rounded
3322toward zero.  If `a' is a NaN, the largest positive integer is returned.
3323Otherwise, if the conversion overflows, the largest integer with the same
3324sign as `a' is returned.
3325-------------------------------------------------------------------------------
3326*/
3327int32 floatx80_to_int32_round_to_zero( floatx80 a )
3328{
3329    flag aSign;
3330    int32 aExp, shiftCount;
3331    bits64 aSig, savedASig;
3332    int32 z;
3333
3334    aSig = extractFloatx80Frac( a );
3335    aExp = extractFloatx80Exp( a );
3336    aSign = extractFloatx80Sign( a );
3337    if ( 0x401E < aExp ) {
3338        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3339        goto invalid;
3340    }
3341    else if ( aExp < 0x3FFF ) {
3342        if ( aExp || aSig ) set_float_exception_inexact_flag();
3343        return 0;
3344    }
3345    shiftCount = 0x403E - aExp;
3346    savedASig = aSig;
3347    aSig >>= shiftCount;
3348    z = aSig;
3349    if ( aSign ) z = - z;
3350    if ( ( z < 0 ) ^ aSign ) {
3351 invalid:
3352        float_raise( float_flag_invalid );
3353        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3354    }
3355    if ( ( aSig<<shiftCount ) != savedASig ) {
3356        set_float_exception_inexact_flag();
3357    }
3358    return z;
3359
3360}
3361
3362/*
3363-------------------------------------------------------------------------------
3364Returns the result of converting the extended double-precision floating-
3365point value `a' to the 64-bit two's complement integer format.  The
3366conversion is performed according to the IEC/IEEE Standard for Binary
3367Floating-Point Arithmetic---which means in particular that the conversion
3368is rounded according to the current rounding mode.  If `a' is a NaN,
3369the largest positive integer is returned.  Otherwise, if the conversion
3370overflows, the largest integer with the same sign as `a' is returned.
3371-------------------------------------------------------------------------------
3372*/
3373int64 floatx80_to_int64( floatx80 a )
3374{
3375    flag aSign;
3376    int32 aExp, shiftCount;
3377    bits64 aSig, aSigExtra;
3378
3379    aSig = extractFloatx80Frac( a );
3380    aExp = extractFloatx80Exp( a );
3381    aSign = extractFloatx80Sign( a );
3382    shiftCount = 0x403E - aExp;
3383    if ( shiftCount <= 0 ) {
3384        if ( shiftCount ) {
3385            float_raise( float_flag_invalid );
3386            if (    ! aSign
3387                 || (    ( aExp == 0x7FFF )
3388                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3389               ) {
3390                return LIT64( 0x7FFFFFFFFFFFFFFF );
3391            }
3392            return (sbits64) LIT64( 0x8000000000000000 );
3393        }
3394        aSigExtra = 0;
3395    }
3396    else {
3397        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3398    }
3399    return roundAndPackInt64( aSign, aSig, aSigExtra );
3400
3401}
3402
3403/*
3404-------------------------------------------------------------------------------
3405Returns the result of converting the extended double-precision floating-
3406point value `a' to the 64-bit two's complement integer format.  The
3407conversion is performed according to the IEC/IEEE Standard for Binary
3408Floating-Point Arithmetic, except that the conversion is always rounded
3409toward zero.  If `a' is a NaN, the largest positive integer is returned.
3410Otherwise, if the conversion overflows, the largest integer with the same
3411sign as `a' is returned.
3412-------------------------------------------------------------------------------
3413*/
3414int64 floatx80_to_int64_round_to_zero( floatx80 a )
3415{
3416    flag aSign;
3417    int32 aExp, shiftCount;
3418    bits64 aSig;
3419    int64 z;
3420
3421    aSig = extractFloatx80Frac( a );
3422    aExp = extractFloatx80Exp( a );
3423    aSign = extractFloatx80Sign( a );
3424    shiftCount = aExp - 0x403E;
3425    if ( 0 <= shiftCount ) {
3426        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3427        if ( ( a.high != 0xC03E ) || aSig ) {
3428            float_raise( float_flag_invalid );
3429            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3430                return LIT64( 0x7FFFFFFFFFFFFFFF );
3431            }
3432        }
3433        return (sbits64) LIT64( 0x8000000000000000 );
3434    }
3435    else if ( aExp < 0x3FFF ) {
3436        if ( aExp | aSig ) set_float_exception_inexact_flag();
3437        return 0;
3438    }
3439    z = aSig>>( - shiftCount );
3440    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3441        set_float_exception_inexact_flag();
3442    }
3443    if ( aSign ) z = - z;
3444    return z;
3445
3446}
3447
3448/*
3449-------------------------------------------------------------------------------
3450Returns the result of converting the extended double-precision floating-
3451point value `a' to the single-precision floating-point format.  The
3452conversion is performed according to the IEC/IEEE Standard for Binary
3453Floating-Point Arithmetic.
3454-------------------------------------------------------------------------------
3455*/
3456float32 floatx80_to_float32( floatx80 a )
3457{
3458    flag aSign;
3459    int32 aExp;
3460    bits64 aSig;
3461
3462    aSig = extractFloatx80Frac( a );
3463    aExp = extractFloatx80Exp( a );
3464    aSign = extractFloatx80Sign( a );
3465    if ( aExp == 0x7FFF ) {
3466        if ( (bits64) ( aSig<<1 ) ) {
3467            return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
3468        }
3469        return packFloat32( aSign, 0xFF, 0 );
3470    }
3471    shift64RightJamming( aSig, 33, &aSig );
3472    if ( aExp || aSig ) aExp -= 0x3F81;
3473    return roundAndPackFloat32( aSign, aExp, aSig );
3474
3475}
3476
3477/*
3478-------------------------------------------------------------------------------
3479Returns the result of converting the extended double-precision floating-
3480point value `a' to the double-precision floating-point format.  The
3481conversion is performed according to the IEC/IEEE Standard for Binary
3482Floating-Point Arithmetic.
3483-------------------------------------------------------------------------------
3484*/
3485float64 floatx80_to_float64( floatx80 a )
3486{
3487    flag aSign;
3488    int32 aExp;
3489    bits64 aSig, zSig;
3490
3491    aSig = extractFloatx80Frac( a );
3492    aExp = extractFloatx80Exp( a );
3493    aSign = extractFloatx80Sign( a );
3494    if ( aExp == 0x7FFF ) {
3495        if ( (bits64) ( aSig<<1 ) ) {
3496            return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
3497        }
3498        return packFloat64( aSign, 0x7FF, 0 );
3499    }
3500    shift64RightJamming( aSig, 1, &zSig );
3501    if ( aExp || aSig ) aExp -= 0x3C01;
3502    return roundAndPackFloat64( aSign, aExp, zSig );
3503
3504}
3505
3506#ifdef FLOAT128
3507
3508/*
3509-------------------------------------------------------------------------------
3510Returns the result of converting the extended double-precision floating-
3511point value `a' to the quadruple-precision floating-point format.  The
3512conversion is performed according to the IEC/IEEE Standard for Binary
3513Floating-Point Arithmetic.
3514-------------------------------------------------------------------------------
3515*/
3516float128 floatx80_to_float128( floatx80 a )
3517{
3518    flag aSign;
3519    int16 aExp;
3520    bits64 aSig, zSig0, zSig1;
3521
3522    aSig = extractFloatx80Frac( a );
3523    aExp = extractFloatx80Exp( a );
3524    aSign = extractFloatx80Sign( a );
3525    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3526        return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
3527    }
3528    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3529    return packFloat128( aSign, aExp, zSig0, zSig1 );
3530
3531}
3532
3533#endif
3534
3535/*
3536-------------------------------------------------------------------------------
3537Rounds the extended double-precision floating-point value `a' to an integer,
3538and returns the result as an extended quadruple-precision floating-point
3539value.  The operation is performed according to the IEC/IEEE Standard for
3540Binary Floating-Point Arithmetic.
3541-------------------------------------------------------------------------------
3542*/
3543floatx80 floatx80_round_to_int( floatx80 a )
3544{
3545    flag aSign;
3546    int32 aExp;
3547    bits64 lastBitMask, roundBitsMask;
3548    int8 roundingMode;
3549    floatx80 z;
3550
3551    aExp = extractFloatx80Exp( a );
3552    if ( 0x403E <= aExp ) {
3553        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3554            return propagateFloatx80NaN( a, a );
3555        }
3556        return a;
3557    }
3558    if ( aExp < 0x3FFF ) {
3559        if (    ( aExp == 0 )
3560             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3561            return a;
3562        }
3563        set_float_exception_inexact_flag();
3564        aSign = extractFloatx80Sign( a );
3565        switch ( float_rounding_mode ) {
3566         case float_round_nearest_even:
3567            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3568               ) {
3569                return
3570                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3571            }
3572            break;
3573	 case float_round_to_zero:
3574	    break;
3575         case float_round_down:
3576            return
3577                  aSign ?
3578                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3579                : packFloatx80( 0, 0, 0 );
3580         case float_round_up:
3581            return
3582                  aSign ? packFloatx80( 1, 0, 0 )
3583                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3584        }
3585        return packFloatx80( aSign, 0, 0 );
3586    }
3587    lastBitMask = 1;
3588    lastBitMask <<= 0x403E - aExp;
3589    roundBitsMask = lastBitMask - 1;
3590    z = a;
3591    roundingMode = float_rounding_mode;
3592    if ( roundingMode == float_round_nearest_even ) {
3593        z.low += lastBitMask>>1;
3594        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3595    }
3596    else if ( roundingMode != float_round_to_zero ) {
3597        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3598            z.low += roundBitsMask;
3599        }
3600    }
3601    z.low &= ~ roundBitsMask;
3602    if ( z.low == 0 ) {
3603        ++z.high;
3604        z.low = LIT64( 0x8000000000000000 );
3605    }
3606    if ( z.low != a.low ) set_float_exception_inexact_flag();
3607    return z;
3608
3609}
3610
3611/*
3612-------------------------------------------------------------------------------
3613Returns the result of adding the absolute values of the extended double-
3614precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
3615negated before being returned.  `zSign' is ignored if the result is a NaN.
3616The addition is performed according to the IEC/IEEE Standard for Binary
3617Floating-Point Arithmetic.
3618-------------------------------------------------------------------------------
3619*/
3620static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3621{
3622    int32 aExp, bExp, zExp;
3623    bits64 aSig, bSig, zSig0, zSig1;
3624    int32 expDiff;
3625
3626    aSig = extractFloatx80Frac( a );
3627    aExp = extractFloatx80Exp( a );
3628    bSig = extractFloatx80Frac( b );
3629    bExp = extractFloatx80Exp( b );
3630    expDiff = aExp - bExp;
3631    if ( 0 < expDiff ) {
3632        if ( aExp == 0x7FFF ) {
3633            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3634            return a;
3635        }
3636        if ( bExp == 0 ) --expDiff;
3637        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3638        zExp = aExp;
3639    }
3640    else if ( expDiff < 0 ) {
3641        if ( bExp == 0x7FFF ) {
3642            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3643            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3644        }
3645        if ( aExp == 0 ) ++expDiff;
3646        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3647        zExp = bExp;
3648    }
3649    else {
3650        if ( aExp == 0x7FFF ) {
3651            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3652                return propagateFloatx80NaN( a, b );
3653            }
3654            return a;
3655        }
3656        zSig1 = 0;
3657        zSig0 = aSig + bSig;
3658        if ( aExp == 0 ) {
3659            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3660            goto roundAndPack;
3661        }
3662        zExp = aExp;
3663        goto shiftRight1;
3664    }
3665    zSig0 = aSig + bSig;
3666    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3667 shiftRight1:
3668    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3669    zSig0 |= LIT64( 0x8000000000000000 );
3670    ++zExp;
3671 roundAndPack:
3672    return
3673        roundAndPackFloatx80(
3674            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3675
3676}
3677
3678/*
3679-------------------------------------------------------------------------------
3680Returns the result of subtracting the absolute values of the extended
3681double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
3682difference is negated before being returned.  `zSign' is ignored if the
3683result is a NaN.  The subtraction is performed according to the IEC/IEEE
3684Standard for Binary Floating-Point Arithmetic.
3685-------------------------------------------------------------------------------
3686*/
3687static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3688{
3689    int32 aExp, bExp, zExp;
3690    bits64 aSig, bSig, zSig0, zSig1;
3691    int32 expDiff;
3692    floatx80 z;
3693
3694    aSig = extractFloatx80Frac( a );
3695    aExp = extractFloatx80Exp( a );
3696    bSig = extractFloatx80Frac( b );
3697    bExp = extractFloatx80Exp( b );
3698    expDiff = aExp - bExp;
3699    if ( 0 < expDiff ) goto aExpBigger;
3700    if ( expDiff < 0 ) goto bExpBigger;
3701    if ( aExp == 0x7FFF ) {
3702        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3703            return propagateFloatx80NaN( a, b );
3704        }
3705        float_raise( float_flag_invalid );
3706        z.low = floatx80_default_nan_low;
3707        z.high = floatx80_default_nan_high;
3708        return z;
3709    }
3710    if ( aExp == 0 ) {
3711        aExp = 1;
3712        bExp = 1;
3713    }
3714    zSig1 = 0;
3715    if ( bSig < aSig ) goto aBigger;
3716    if ( aSig < bSig ) goto bBigger;
3717    return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
3718 bExpBigger:
3719    if ( bExp == 0x7FFF ) {
3720        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3721        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3722    }
3723    if ( aExp == 0 ) ++expDiff;
3724    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3725 bBigger:
3726    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3727    zExp = bExp;
3728    zSign ^= 1;
3729    goto normalizeRoundAndPack;
3730 aExpBigger:
3731    if ( aExp == 0x7FFF ) {
3732        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3733        return a;
3734    }
3735    if ( bExp == 0 ) --expDiff;
3736    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3737 aBigger:
3738    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3739    zExp = aExp;
3740 normalizeRoundAndPack:
3741    return
3742        normalizeRoundAndPackFloatx80(
3743            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3744
3745}
3746
3747/*
3748-------------------------------------------------------------------------------
3749Returns the result of adding the extended double-precision floating-point
3750values `a' and `b'.  The operation is performed according to the IEC/IEEE
3751Standard for Binary Floating-Point Arithmetic.
3752-------------------------------------------------------------------------------
3753*/
3754floatx80 floatx80_add( floatx80 a, floatx80 b )
3755{
3756    flag aSign, bSign;
3757
3758    aSign = extractFloatx80Sign( a );
3759    bSign = extractFloatx80Sign( b );
3760    if ( aSign == bSign ) {
3761        return addFloatx80Sigs( a, b, aSign );
3762    }
3763    else {
3764        return subFloatx80Sigs( a, b, aSign );
3765    }
3766
3767}
3768
3769/*
3770-------------------------------------------------------------------------------
3771Returns the result of subtracting the extended double-precision floating-
3772point values `a' and `b'.  The operation is performed according to the
3773IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3774-------------------------------------------------------------------------------
3775*/
3776floatx80 floatx80_sub( floatx80 a, floatx80 b )
3777{
3778    flag aSign, bSign;
3779
3780    aSign = extractFloatx80Sign( a );
3781    bSign = extractFloatx80Sign( b );
3782    if ( aSign == bSign ) {
3783        return subFloatx80Sigs( a, b, aSign );
3784    }
3785    else {
3786        return addFloatx80Sigs( a, b, aSign );
3787    }
3788
3789}
3790
3791/*
3792-------------------------------------------------------------------------------
3793Returns the result of multiplying the extended double-precision floating-
3794point values `a' and `b'.  The operation is performed according to the
3795IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3796-------------------------------------------------------------------------------
3797*/
3798floatx80 floatx80_mul( floatx80 a, floatx80 b )
3799{
3800    flag aSign, bSign, zSign;
3801    int32 aExp, bExp, zExp;
3802    bits64 aSig, bSig, zSig0, zSig1;
3803    floatx80 z;
3804
3805    aSig = extractFloatx80Frac( a );
3806    aExp = extractFloatx80Exp( a );
3807    aSign = extractFloatx80Sign( a );
3808    bSig = extractFloatx80Frac( b );
3809    bExp = extractFloatx80Exp( b );
3810    bSign = extractFloatx80Sign( b );
3811    zSign = aSign ^ bSign;
3812    if ( aExp == 0x7FFF ) {
3813        if (    (bits64) ( aSig<<1 )
3814             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3815            return propagateFloatx80NaN( a, b );
3816        }
3817        if ( ( bExp | bSig ) == 0 ) goto invalid;
3818        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3819    }
3820    if ( bExp == 0x7FFF ) {
3821        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3822        if ( ( aExp | aSig ) == 0 ) {
3823 invalid:
3824            float_raise( float_flag_invalid );
3825            z.low = floatx80_default_nan_low;
3826            z.high = floatx80_default_nan_high;
3827            return z;
3828        }
3829        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3830    }
3831    if ( aExp == 0 ) {
3832        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3833        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3834    }
3835    if ( bExp == 0 ) {
3836        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3837        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3838    }
3839    zExp = aExp + bExp - 0x3FFE;
3840    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3841    if ( 0 < (sbits64) zSig0 ) {
3842        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3843        --zExp;
3844    }
3845    return
3846        roundAndPackFloatx80(
3847            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3848
3849}
3850
3851/*
3852-------------------------------------------------------------------------------
3853Returns the result of dividing the extended double-precision floating-point
3854value `a' by the corresponding value `b'.  The operation is performed
3855according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3856-------------------------------------------------------------------------------
3857*/
3858floatx80 floatx80_div( floatx80 a, floatx80 b )
3859{
3860    flag aSign, bSign, zSign;
3861    int32 aExp, bExp, zExp;
3862    bits64 aSig, bSig, zSig0, zSig1;
3863    bits64 rem0, rem1, rem2, term0, term1, term2;
3864    floatx80 z;
3865
3866    aSig = extractFloatx80Frac( a );
3867    aExp = extractFloatx80Exp( a );
3868    aSign = extractFloatx80Sign( a );
3869    bSig = extractFloatx80Frac( b );
3870    bExp = extractFloatx80Exp( b );
3871    bSign = extractFloatx80Sign( b );
3872    zSign = aSign ^ bSign;
3873    if ( aExp == 0x7FFF ) {
3874        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3875        if ( bExp == 0x7FFF ) {
3876            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3877            goto invalid;
3878        }
3879        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3880    }
3881    if ( bExp == 0x7FFF ) {
3882        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3883        return packFloatx80( zSign, 0, 0 );
3884    }
3885    if ( bExp == 0 ) {
3886        if ( bSig == 0 ) {
3887            if ( ( aExp | aSig ) == 0 ) {
3888 invalid:
3889                float_raise( float_flag_invalid );
3890                z.low = floatx80_default_nan_low;
3891                z.high = floatx80_default_nan_high;
3892                return z;
3893            }
3894            float_raise( float_flag_divbyzero );
3895            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3896        }
3897        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3898    }
3899    if ( aExp == 0 ) {
3900        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3901        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3902    }
3903    zExp = aExp - bExp + 0x3FFE;
3904    rem1 = 0;
3905    if ( bSig <= aSig ) {
3906        shift128Right( aSig, 0, 1, &aSig, &rem1 );
3907        ++zExp;
3908    }
3909    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3910    mul64To128( bSig, zSig0, &term0, &term1 );
3911    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3912    while ( (sbits64) rem0 < 0 ) {
3913        --zSig0;
3914        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3915    }
3916    zSig1 = estimateDiv128To64( rem1, 0, bSig );
3917    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3918        mul64To128( bSig, zSig1, &term1, &term2 );
3919        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3920        while ( (sbits64) rem1 < 0 ) {
3921            --zSig1;
3922            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3923        }
3924        zSig1 |= ( ( rem1 | rem2 ) != 0 );
3925    }
3926    return
3927        roundAndPackFloatx80(
3928            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3929
3930}
3931
3932/*
3933-------------------------------------------------------------------------------
3934Returns the remainder of the extended double-precision floating-point value
3935`a' with respect to the corresponding value `b'.  The operation is performed
3936according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3937-------------------------------------------------------------------------------
3938*/
3939floatx80 floatx80_rem( floatx80 a, floatx80 b )
3940{
3941    flag aSign, zSign;
3942    int32 aExp, bExp, expDiff;
3943    bits64 aSig0, aSig1, bSig;
3944    bits64 q, term0, term1, alternateASig0, alternateASig1;
3945    floatx80 z;
3946
3947    aSig0 = extractFloatx80Frac( a );
3948    aExp = extractFloatx80Exp( a );
3949    aSign = extractFloatx80Sign( a );
3950    bSig = extractFloatx80Frac( b );
3951    bExp = extractFloatx80Exp( b );
3952
3953    if ( aExp == 0x7FFF ) {
3954        if (    (bits64) ( aSig0<<1 )
3955             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3956            return propagateFloatx80NaN( a, b );
3957        }
3958        goto invalid;
3959    }
3960    if ( bExp == 0x7FFF ) {
3961        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3962        return a;
3963    }
3964    if ( bExp == 0 ) {
3965        if ( bSig == 0 ) {
3966 invalid:
3967            float_raise( float_flag_invalid );
3968            z.low = floatx80_default_nan_low;
3969            z.high = floatx80_default_nan_high;
3970            return z;
3971        }
3972        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3973    }
3974    if ( aExp == 0 ) {
3975        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3976        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3977    }
3978    bSig |= LIT64( 0x8000000000000000 );
3979    zSign = aSign;
3980    expDiff = aExp - bExp;
3981    aSig1 = 0;
3982    if ( expDiff < 0 ) {
3983        if ( expDiff < -1 ) return a;
3984        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3985        expDiff = 0;
3986    }
3987    q = ( bSig <= aSig0 );
3988    if ( q ) aSig0 -= bSig;
3989    expDiff -= 64;
3990    while ( 0 < expDiff ) {
3991        q = estimateDiv128To64( aSig0, aSig1, bSig );
3992        q = ( 2 < q ) ? q - 2 : 0;
3993        mul64To128( bSig, q, &term0, &term1 );
3994        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3995        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3996        expDiff -= 62;
3997    }
3998    expDiff += 64;
3999    if ( 0 < expDiff ) {
4000        q = estimateDiv128To64( aSig0, aSig1, bSig );
4001        q = ( 2 < q ) ? q - 2 : 0;
4002        q >>= 64 - expDiff;
4003        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4004        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4005        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4006        while ( le128( term0, term1, aSig0, aSig1 ) ) {
4007            ++q;
4008            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4009        }
4010    }
4011    else {
4012        term1 = 0;
4013        term0 = bSig;
4014    }
4015    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4016    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4017         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4018              && ( q & 1 ) )
4019       ) {
4020        aSig0 = alternateASig0;
4021        aSig1 = alternateASig1;
4022        zSign = ! zSign;
4023    }
4024    return
4025        normalizeRoundAndPackFloatx80(
4026            80, zSign, bExp + expDiff, aSig0, aSig1 );
4027
4028}
4029
4030/*
4031-------------------------------------------------------------------------------
4032Returns the square root of the extended double-precision floating-point
4033value `a'.  The operation is performed according to the IEC/IEEE Standard
4034for Binary Floating-Point Arithmetic.
4035-------------------------------------------------------------------------------
4036*/
4037floatx80 floatx80_sqrt( floatx80 a )
4038{
4039    flag aSign;
4040    int32 aExp, zExp;
4041    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4042    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4043    floatx80 z;
4044
4045    aSig0 = extractFloatx80Frac( a );
4046    aExp = extractFloatx80Exp( a );
4047    aSign = extractFloatx80Sign( a );
4048    if ( aExp == 0x7FFF ) {
4049        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
4050        if ( ! aSign ) return a;
4051        goto invalid;
4052    }
4053    if ( aSign ) {
4054        if ( ( aExp | aSig0 ) == 0 ) return a;
4055 invalid:
4056        float_raise( float_flag_invalid );
4057        z.low = floatx80_default_nan_low;
4058        z.high = floatx80_default_nan_high;
4059        return z;
4060    }
4061    if ( aExp == 0 ) {
4062        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4063        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4064    }
4065    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4066    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4067    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4068    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4069    doubleZSig0 = zSig0<<1;
4070    mul64To128( zSig0, zSig0, &term0, &term1 );
4071    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4072    while ( (sbits64) rem0 < 0 ) {
4073        --zSig0;
4074        doubleZSig0 -= 2;
4075        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4076    }
4077    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4078    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4079        if ( zSig1 == 0 ) zSig1 = 1;
4080        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4081        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4082        mul64To128( zSig1, zSig1, &term2, &term3 );
4083        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4084        while ( (sbits64) rem1 < 0 ) {
4085            --zSig1;
4086            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4087            term3 |= 1;
4088            term2 |= doubleZSig0;
4089            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4090        }
4091        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4092    }
4093    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4094    zSig0 |= doubleZSig0;
4095    return
4096        roundAndPackFloatx80(
4097            floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
4098
4099}
4100
4101/*
4102-------------------------------------------------------------------------------
4103Returns 1 if the extended double-precision floating-point value `a' is
4104equal to the corresponding value `b', and 0 otherwise.  The comparison is
4105performed according to the IEC/IEEE Standard for Binary Floating-Point
4106Arithmetic.
4107-------------------------------------------------------------------------------
4108*/
4109flag floatx80_eq( floatx80 a, floatx80 b )
4110{
4111
4112    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4113              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4114         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4115              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4116       ) {
4117        if (    floatx80_is_signaling_nan( a )
4118             || floatx80_is_signaling_nan( b ) ) {
4119            float_raise( float_flag_invalid );
4120        }
4121        return 0;
4122    }
4123    return
4124           ( a.low == b.low )
4125        && (    ( a.high == b.high )
4126             || (    ( a.low == 0 )
4127                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4128           );
4129
4130}
4131
4132/*
4133-------------------------------------------------------------------------------
4134Returns 1 if the extended double-precision floating-point value `a' is
4135less than or equal to the corresponding value `b', and 0 otherwise.  The
4136comparison is performed according to the IEC/IEEE Standard for Binary
4137Floating-Point Arithmetic.
4138-------------------------------------------------------------------------------
4139*/
4140flag floatx80_le( floatx80 a, floatx80 b )
4141{
4142    flag aSign, bSign;
4143
4144    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4145              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4146         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4147              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4148       ) {
4149        float_raise( float_flag_invalid );
4150        return 0;
4151    }
4152    aSign = extractFloatx80Sign( a );
4153    bSign = extractFloatx80Sign( b );
4154    if ( aSign != bSign ) {
4155        return
4156               aSign
4157            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4158                 == 0 );
4159    }
4160    return
4161          aSign ? le128( b.high, b.low, a.high, a.low )
4162        : le128( a.high, a.low, b.high, b.low );
4163
4164}
4165
4166/*
4167-------------------------------------------------------------------------------
4168Returns 1 if the extended double-precision floating-point value `a' is
4169less than the corresponding value `b', and 0 otherwise.  The comparison
4170is performed according to the IEC/IEEE Standard for Binary Floating-Point
4171Arithmetic.
4172-------------------------------------------------------------------------------
4173*/
4174flag floatx80_lt( floatx80 a, floatx80 b )
4175{
4176    flag aSign, bSign;
4177
4178    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4179              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4180         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4181              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4182       ) {
4183        float_raise( float_flag_invalid );
4184        return 0;
4185    }
4186    aSign = extractFloatx80Sign( a );
4187    bSign = extractFloatx80Sign( b );
4188    if ( aSign != bSign ) {
4189        return
4190               aSign
4191            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4192                 != 0 );
4193    }
4194    return
4195          aSign ? lt128( b.high, b.low, a.high, a.low )
4196        : lt128( a.high, a.low, b.high, b.low );
4197
4198}
4199
4200/*
4201-------------------------------------------------------------------------------
4202Returns 1 if the extended double-precision floating-point value `a' is equal
4203to the corresponding value `b', and 0 otherwise.  The invalid exception is
4204raised if either operand is a NaN.  Otherwise, the comparison is performed
4205according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4206-------------------------------------------------------------------------------
4207*/
4208flag floatx80_eq_signaling( floatx80 a, floatx80 b )
4209{
4210
4211    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4212              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4213         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4214              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4215       ) {
4216        float_raise( float_flag_invalid );
4217        return 0;
4218    }
4219    return
4220           ( a.low == b.low )
4221        && (    ( a.high == b.high )
4222             || (    ( a.low == 0 )
4223                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4224           );
4225
4226}
4227
4228/*
4229-------------------------------------------------------------------------------
4230Returns 1 if the extended double-precision floating-point value `a' is less
4231than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4232do not cause an exception.  Otherwise, the comparison is performed according
4233to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4234-------------------------------------------------------------------------------
4235*/
4236flag floatx80_le_quiet( floatx80 a, floatx80 b )
4237{
4238    flag aSign, bSign;
4239
4240    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4241              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4242         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4243              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4244       ) {
4245        if (    floatx80_is_signaling_nan( a )
4246             || floatx80_is_signaling_nan( b ) ) {
4247            float_raise( float_flag_invalid );
4248        }
4249        return 0;
4250    }
4251    aSign = extractFloatx80Sign( a );
4252    bSign = extractFloatx80Sign( b );
4253    if ( aSign != bSign ) {
4254        return
4255               aSign
4256            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4257                 == 0 );
4258    }
4259    return
4260          aSign ? le128( b.high, b.low, a.high, a.low )
4261        : le128( a.high, a.low, b.high, b.low );
4262
4263}
4264
4265/*
4266-------------------------------------------------------------------------------
4267Returns 1 if the extended double-precision floating-point value `a' is less
4268than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4269an exception.  Otherwise, the comparison is performed according to the
4270IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4271-------------------------------------------------------------------------------
4272*/
4273flag floatx80_lt_quiet( floatx80 a, floatx80 b )
4274{
4275    flag aSign, bSign;
4276
4277    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4278              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4279         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4280              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4281       ) {
4282        if (    floatx80_is_signaling_nan( a )
4283             || floatx80_is_signaling_nan( b ) ) {
4284            float_raise( float_flag_invalid );
4285        }
4286        return 0;
4287    }
4288    aSign = extractFloatx80Sign( a );
4289    bSign = extractFloatx80Sign( b );
4290    if ( aSign != bSign ) {
4291        return
4292               aSign
4293            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4294                 != 0 );
4295    }
4296    return
4297          aSign ? lt128( b.high, b.low, a.high, a.low )
4298        : lt128( a.high, a.low, b.high, b.low );
4299
4300}
4301
4302#endif
4303
4304#ifdef FLOAT128
4305
4306/*
4307-------------------------------------------------------------------------------
4308Returns the result of converting the quadruple-precision floating-point
4309value `a' to the 32-bit two's complement integer format.  The conversion
4310is performed according to the IEC/IEEE Standard for Binary Floating-Point
4311Arithmetic---which means in particular that the conversion is rounded
4312according to the current rounding mode.  If `a' is a NaN, the largest
4313positive integer is returned.  Otherwise, if the conversion overflows, the
4314largest integer with the same sign as `a' is returned.
4315-------------------------------------------------------------------------------
4316*/
4317int32 float128_to_int32( float128 a )
4318{
4319    flag aSign;
4320    int32 aExp, shiftCount;
4321    bits64 aSig0, aSig1;
4322
4323    aSig1 = extractFloat128Frac1( a );
4324    aSig0 = extractFloat128Frac0( a );
4325    aExp = extractFloat128Exp( a );
4326    aSign = extractFloat128Sign( a );
4327    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4328    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4329    aSig0 |= ( aSig1 != 0 );
4330    shiftCount = 0x4028 - aExp;
4331    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4332    return roundAndPackInt32( aSign, aSig0 );
4333
4334}
4335
4336/*
4337-------------------------------------------------------------------------------
4338Returns the result of converting the quadruple-precision floating-point
4339value `a' to the 32-bit two's complement integer format.  The conversion
4340is performed according to the IEC/IEEE Standard for Binary Floating-Point
4341Arithmetic, except that the conversion is always rounded toward zero.  If
4342`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4343conversion overflows, the largest integer with the same sign as `a' is
4344returned.
4345-------------------------------------------------------------------------------
4346*/
4347int32 float128_to_int32_round_to_zero( float128 a )
4348{
4349    flag aSign;
4350    int32 aExp, shiftCount;
4351    bits64 aSig0, aSig1, savedASig;
4352    int32 z;
4353
4354    aSig1 = extractFloat128Frac1( a );
4355    aSig0 = extractFloat128Frac0( a );
4356    aExp = extractFloat128Exp( a );
4357    aSign = extractFloat128Sign( a );
4358    aSig0 |= ( aSig1 != 0 );
4359    if ( 0x401E < aExp ) {
4360        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4361        goto invalid;
4362    }
4363    else if ( aExp < 0x3FFF ) {
4364        if ( aExp || aSig0 ) set_float_exception_inexact_flag();
4365        return 0;
4366    }
4367    aSig0 |= LIT64( 0x0001000000000000 );
4368    shiftCount = 0x402F - aExp;
4369    savedASig = aSig0;
4370    aSig0 >>= shiftCount;
4371    z = (int32)aSig0;
4372    if ( aSign ) z = - z;
4373    if ( ( z < 0 ) ^ aSign ) {
4374 invalid:
4375        float_raise( float_flag_invalid );
4376        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4377    }
4378    if ( ( aSig0<<shiftCount ) != savedASig ) {
4379        set_float_exception_inexact_flag();
4380    }
4381    return z;
4382
4383}
4384
4385/*
4386-------------------------------------------------------------------------------
4387Returns the result of converting the quadruple-precision floating-point
4388value `a' to the 64-bit two's complement integer format.  The conversion
4389is performed according to the IEC/IEEE Standard for Binary Floating-Point
4390Arithmetic---which means in particular that the conversion is rounded
4391according to the current rounding mode.  If `a' is a NaN, the largest
4392positive integer is returned.  Otherwise, if the conversion overflows, the
4393largest integer with the same sign as `a' is returned.
4394-------------------------------------------------------------------------------
4395*/
4396int64 float128_to_int64( float128 a )
4397{
4398    flag aSign;
4399    int32 aExp, shiftCount;
4400    bits64 aSig0, aSig1;
4401
4402    aSig1 = extractFloat128Frac1( a );
4403    aSig0 = extractFloat128Frac0( a );
4404    aExp = extractFloat128Exp( a );
4405    aSign = extractFloat128Sign( a );
4406    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4407    shiftCount = 0x402F - aExp;
4408    if ( shiftCount <= 0 ) {
4409        if ( 0x403E < aExp ) {
4410            float_raise( float_flag_invalid );
4411            if (    ! aSign
4412                 || (    ( aExp == 0x7FFF )
4413                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4414                    )
4415               ) {
4416                return LIT64( 0x7FFFFFFFFFFFFFFF );
4417            }
4418            return (sbits64) LIT64( 0x8000000000000000 );
4419        }
4420        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4421    }
4422    else {
4423        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4424    }
4425    return roundAndPackInt64( aSign, aSig0, aSig1 );
4426
4427}
4428
4429/*
4430-------------------------------------------------------------------------------
4431Returns the result of converting the quadruple-precision floating-point
4432value `a' to the 64-bit two's complement integer format.  The conversion
4433is performed according to the IEC/IEEE Standard for Binary Floating-Point
4434Arithmetic, except that the conversion is always rounded toward zero.
4435If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4436the conversion overflows, the largest integer with the same sign as `a' is
4437returned.
4438-------------------------------------------------------------------------------
4439*/
4440int64 float128_to_int64_round_to_zero( float128 a )
4441{
4442    flag aSign;
4443    int32 aExp, shiftCount;
4444    bits64 aSig0, aSig1;
4445    int64 z;
4446
4447    aSig1 = extractFloat128Frac1( a );
4448    aSig0 = extractFloat128Frac0( a );
4449    aExp = extractFloat128Exp( a );
4450    aSign = extractFloat128Sign( a );
4451    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4452    shiftCount = aExp - 0x402F;
4453    if ( 0 < shiftCount ) {
4454        if ( 0x403E <= aExp ) {
4455            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4456            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4457                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4458                if ( aSig1 ) set_float_exception_inexact_flag();
4459            }
4460            else {
4461                float_raise( float_flag_invalid );
4462                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4463                    return LIT64( 0x7FFFFFFFFFFFFFFF );
4464                }
4465            }
4466            return (sbits64) LIT64( 0x8000000000000000 );
4467        }
4468        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4469        if ( (bits64) ( aSig1<<shiftCount ) ) {
4470            set_float_exception_inexact_flag();
4471        }
4472    }
4473    else {
4474        if ( aExp < 0x3FFF ) {
4475            if ( aExp | aSig0 | aSig1 ) {
4476                set_float_exception_inexact_flag();
4477            }
4478            return 0;
4479        }
4480        z = aSig0>>( - shiftCount );
4481        if (    aSig1
4482             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4483            set_float_exception_inexact_flag();
4484        }
4485    }
4486    if ( aSign ) z = - z;
4487    return z;
4488
4489}
4490
4491#if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \
4492    && defined(SOFTFLOAT_NEED_FIXUNS)
4493/*
4494 * just like above - but do not care for overflow of signed results
4495 */
4496uint64 float128_to_uint64_round_to_zero( float128 a )
4497{
4498    flag aSign;
4499    int32 aExp, shiftCount;
4500    bits64 aSig0, aSig1;
4501    uint64 z;
4502
4503    aSig1 = extractFloat128Frac1( a );
4504    aSig0 = extractFloat128Frac0( a );
4505    aExp = extractFloat128Exp( a );
4506    aSign = extractFloat128Sign( a );
4507    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4508    shiftCount = aExp - 0x402F;
4509    if ( 0 < shiftCount ) {
4510        if ( 0x403F <= aExp ) {
4511            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4512            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4513                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4514                if ( aSig1 ) set_float_exception_inexact_flag();
4515            }
4516            else {
4517                float_raise( float_flag_invalid );
4518            }
4519            return LIT64( 0xFFFFFFFFFFFFFFFF );
4520        }
4521        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4522        if ( (bits64) ( aSig1<<shiftCount ) ) {
4523            set_float_exception_inexact_flag();
4524        }
4525    }
4526    else {
4527        if ( aExp < 0x3FFF ) {
4528            if ( aExp | aSig0 | aSig1 ) {
4529                set_float_exception_inexact_flag();
4530            }
4531            return 0;
4532        }
4533        z = aSig0>>( - shiftCount );
4534        if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4535            set_float_exception_inexact_flag();
4536        }
4537    }
4538    if ( aSign ) z = - z;
4539    return z;
4540
4541}
4542#endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */
4543
4544/*
4545-------------------------------------------------------------------------------
4546Returns the result of converting the quadruple-precision floating-point
4547value `a' to the single-precision floating-point format.  The conversion
4548is performed according to the IEC/IEEE Standard for Binary Floating-Point
4549Arithmetic.
4550-------------------------------------------------------------------------------
4551*/
4552float32 float128_to_float32( float128 a )
4553{
4554    flag aSign;
4555    int32 aExp;
4556    bits64 aSig0, aSig1;
4557    bits32 zSig;
4558
4559    aSig1 = extractFloat128Frac1( a );
4560    aSig0 = extractFloat128Frac0( a );
4561    aExp = extractFloat128Exp( a );
4562    aSign = extractFloat128Sign( a );
4563    if ( aExp == 0x7FFF ) {
4564        if ( aSig0 | aSig1 ) {
4565            return commonNaNToFloat32( float128ToCommonNaN( a ) );
4566        }
4567        return packFloat32( aSign, 0xFF, 0 );
4568    }
4569    aSig0 |= ( aSig1 != 0 );
4570    shift64RightJamming( aSig0, 18, &aSig0 );
4571    zSig = (bits32)aSig0;
4572    if ( aExp || zSig ) {
4573        zSig |= 0x40000000;
4574        aExp -= 0x3F81;
4575    }
4576    return roundAndPackFloat32( aSign, aExp, zSig );
4577
4578}
4579
4580/*
4581-------------------------------------------------------------------------------
4582Returns the result of converting the quadruple-precision floating-point
4583value `a' to the double-precision floating-point format.  The conversion
4584is performed according to the IEC/IEEE Standard for Binary Floating-Point
4585Arithmetic.
4586-------------------------------------------------------------------------------
4587*/
4588float64 float128_to_float64( float128 a )
4589{
4590    flag aSign;
4591    int32 aExp;
4592    bits64 aSig0, aSig1;
4593
4594    aSig1 = extractFloat128Frac1( a );
4595    aSig0 = extractFloat128Frac0( a );
4596    aExp = extractFloat128Exp( a );
4597    aSign = extractFloat128Sign( a );
4598    if ( aExp == 0x7FFF ) {
4599        if ( aSig0 | aSig1 ) {
4600            return commonNaNToFloat64( float128ToCommonNaN( a ) );
4601        }
4602        return packFloat64( aSign, 0x7FF, 0 );
4603    }
4604    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4605    aSig0 |= ( aSig1 != 0 );
4606    if ( aExp || aSig0 ) {
4607        aSig0 |= LIT64( 0x4000000000000000 );
4608        aExp -= 0x3C01;
4609    }
4610    return roundAndPackFloat64( aSign, aExp, aSig0 );
4611
4612}
4613
4614#ifdef FLOATX80
4615
4616/*
4617-------------------------------------------------------------------------------
4618Returns the result of converting the quadruple-precision floating-point
4619value `a' to the extended double-precision floating-point format.  The
4620conversion is performed according to the IEC/IEEE Standard for Binary
4621Floating-Point Arithmetic.
4622-------------------------------------------------------------------------------
4623*/
4624floatx80 float128_to_floatx80( float128 a )
4625{
4626    flag aSign;
4627    int32 aExp;
4628    bits64 aSig0, aSig1;
4629
4630    aSig1 = extractFloat128Frac1( a );
4631    aSig0 = extractFloat128Frac0( a );
4632    aExp = extractFloat128Exp( a );
4633    aSign = extractFloat128Sign( a );
4634    if ( aExp == 0x7FFF ) {
4635        if ( aSig0 | aSig1 ) {
4636            return commonNaNToFloatx80( float128ToCommonNaN( a ) );
4637        }
4638        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4639    }
4640    if ( aExp == 0 ) {
4641        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4642        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4643    }
4644    else {
4645        aSig0 |= LIT64( 0x0001000000000000 );
4646    }
4647    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4648    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
4649
4650}
4651
4652#endif
4653
4654/*
4655-------------------------------------------------------------------------------
4656Rounds the quadruple-precision floating-point value `a' to an integer, and
4657returns the result as a quadruple-precision floating-point value.  The
4658operation is performed according to the IEC/IEEE Standard for Binary
4659Floating-Point Arithmetic.
4660-------------------------------------------------------------------------------
4661*/
4662float128 float128_round_to_int( float128 a )
4663{
4664    flag aSign;
4665    int32 aExp;
4666    bits64 lastBitMask, roundBitsMask;
4667    int8 roundingMode;
4668    float128 z;
4669
4670    aExp = extractFloat128Exp( a );
4671    if ( 0x402F <= aExp ) {
4672        if ( 0x406F <= aExp ) {
4673            if (    ( aExp == 0x7FFF )
4674                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4675               ) {
4676                return propagateFloat128NaN( a, a );
4677            }
4678            return a;
4679        }
4680        lastBitMask = 1;
4681        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4682        roundBitsMask = lastBitMask - 1;
4683        z = a;
4684        roundingMode = float_rounding_mode;
4685        if ( roundingMode == float_round_nearest_even ) {
4686            if ( lastBitMask ) {
4687                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4688                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4689            }
4690            else {
4691                if ( (sbits64) z.low < 0 ) {
4692                    ++z.high;
4693                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4694                }
4695            }
4696        }
4697        else if ( roundingMode != float_round_to_zero ) {
4698            if (   extractFloat128Sign( z )
4699                 ^ ( roundingMode == float_round_up ) ) {
4700                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4701            }
4702        }
4703        z.low &= ~ roundBitsMask;
4704    }
4705    else {
4706        if ( aExp < 0x3FFF ) {
4707            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4708            set_float_exception_inexact_flag();
4709            aSign = extractFloat128Sign( a );
4710            switch ( float_rounding_mode ) {
4711             case float_round_nearest_even:
4712                if (    ( aExp == 0x3FFE )
4713                     && (   extractFloat128Frac0( a )
4714                          | extractFloat128Frac1( a ) )
4715                   ) {
4716                    return packFloat128( aSign, 0x3FFF, 0, 0 );
4717                }
4718                break;
4719	     case float_round_to_zero:
4720		break;
4721             case float_round_down:
4722                return
4723                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4724                    : packFloat128( 0, 0, 0, 0 );
4725             case float_round_up:
4726                return
4727                      aSign ? packFloat128( 1, 0, 0, 0 )
4728                    : packFloat128( 0, 0x3FFF, 0, 0 );
4729            }
4730            return packFloat128( aSign, 0, 0, 0 );
4731        }
4732        lastBitMask = 1;
4733        lastBitMask <<= 0x402F - aExp;
4734        roundBitsMask = lastBitMask - 1;
4735        z.low = 0;
4736        z.high = a.high;
4737        roundingMode = float_rounding_mode;
4738        if ( roundingMode == float_round_nearest_even ) {
4739            z.high += lastBitMask>>1;
4740            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4741                z.high &= ~ lastBitMask;
4742            }
4743        }
4744        else if ( roundingMode != float_round_to_zero ) {
4745            if (   extractFloat128Sign( z )
4746                 ^ ( roundingMode == float_round_up ) ) {
4747                z.high |= ( a.low != 0 );
4748                z.high += roundBitsMask;
4749            }
4750        }
4751        z.high &= ~ roundBitsMask;
4752    }
4753    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4754        set_float_exception_inexact_flag();
4755    }
4756    return z;
4757
4758}
4759
4760/*
4761-------------------------------------------------------------------------------
4762Returns the result of adding the absolute values of the quadruple-precision
4763floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
4764before being returned.  `zSign' is ignored if the result is a NaN.
4765The addition is performed according to the IEC/IEEE Standard for Binary
4766Floating-Point Arithmetic.
4767-------------------------------------------------------------------------------
4768*/
4769static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
4770{
4771    int32 aExp, bExp, zExp;
4772    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4773    int32 expDiff;
4774
4775    aSig1 = extractFloat128Frac1( a );
4776    aSig0 = extractFloat128Frac0( a );
4777    aExp = extractFloat128Exp( a );
4778    bSig1 = extractFloat128Frac1( b );
4779    bSig0 = extractFloat128Frac0( b );
4780    bExp = extractFloat128Exp( b );
4781    expDiff = aExp - bExp;
4782    if ( 0 < expDiff ) {
4783        if ( aExp == 0x7FFF ) {
4784            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4785            return a;
4786        }
4787        if ( bExp == 0 ) {
4788            --expDiff;
4789        }
4790        else {
4791            bSig0 |= LIT64( 0x0001000000000000 );
4792        }
4793        shift128ExtraRightJamming(
4794            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4795        zExp = aExp;
4796    }
4797    else if ( expDiff < 0 ) {
4798        if ( bExp == 0x7FFF ) {
4799            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4800            return packFloat128( zSign, 0x7FFF, 0, 0 );
4801        }
4802        if ( aExp == 0 ) {
4803            ++expDiff;
4804        }
4805        else {
4806            aSig0 |= LIT64( 0x0001000000000000 );
4807        }
4808        shift128ExtraRightJamming(
4809            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4810        zExp = bExp;
4811    }
4812    else {
4813        if ( aExp == 0x7FFF ) {
4814            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4815                return propagateFloat128NaN( a, b );
4816            }
4817            return a;
4818        }
4819        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4820        if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
4821        zSig2 = 0;
4822        zSig0 |= LIT64( 0x0002000000000000 );
4823        zExp = aExp;
4824        goto shiftRight1;
4825    }
4826    aSig0 |= LIT64( 0x0001000000000000 );
4827    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4828    --zExp;
4829    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4830    ++zExp;
4831 shiftRight1:
4832    shift128ExtraRightJamming(
4833        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4834 roundAndPack:
4835    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
4836
4837}
4838
4839/*
4840-------------------------------------------------------------------------------
4841Returns the result of subtracting the absolute values of the quadruple-
4842precision floating-point values `a' and `b'.  If `zSign' is 1, the
4843difference is negated before being returned.  `zSign' is ignored if the
4844result is a NaN.  The subtraction is performed according to the IEC/IEEE
4845Standard for Binary Floating-Point Arithmetic.
4846-------------------------------------------------------------------------------
4847*/
4848static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
4849{
4850    int32 aExp, bExp, zExp;
4851    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4852    int32 expDiff;
4853    float128 z;
4854
4855    aSig1 = extractFloat128Frac1( a );
4856    aSig0 = extractFloat128Frac0( a );
4857    aExp = extractFloat128Exp( a );
4858    bSig1 = extractFloat128Frac1( b );
4859    bSig0 = extractFloat128Frac0( b );
4860    bExp = extractFloat128Exp( b );
4861    expDiff = aExp - bExp;
4862    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4863    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4864    if ( 0 < expDiff ) goto aExpBigger;
4865    if ( expDiff < 0 ) goto bExpBigger;
4866    if ( aExp == 0x7FFF ) {
4867        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4868            return propagateFloat128NaN( a, b );
4869        }
4870        float_raise( float_flag_invalid );
4871        z.low = float128_default_nan_low;
4872        z.high = float128_default_nan_high;
4873        return z;
4874    }
4875    if ( aExp == 0 ) {
4876        aExp = 1;
4877        bExp = 1;
4878    }
4879    if ( bSig0 < aSig0 ) goto aBigger;
4880    if ( aSig0 < bSig0 ) goto bBigger;
4881    if ( bSig1 < aSig1 ) goto aBigger;
4882    if ( aSig1 < bSig1 ) goto bBigger;
4883    return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
4884 bExpBigger:
4885    if ( bExp == 0x7FFF ) {
4886        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4887        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4888    }
4889    if ( aExp == 0 ) {
4890        ++expDiff;
4891    }
4892    else {
4893        aSig0 |= LIT64( 0x4000000000000000 );
4894    }
4895    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4896    bSig0 |= LIT64( 0x4000000000000000 );
4897 bBigger:
4898    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4899    zExp = bExp;
4900    zSign ^= 1;
4901    goto normalizeRoundAndPack;
4902 aExpBigger:
4903    if ( aExp == 0x7FFF ) {
4904        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4905        return a;
4906    }
4907    if ( bExp == 0 ) {
4908        --expDiff;
4909    }
4910    else {
4911        bSig0 |= LIT64( 0x4000000000000000 );
4912    }
4913    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4914    aSig0 |= LIT64( 0x4000000000000000 );
4915 aBigger:
4916    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4917    zExp = aExp;
4918 normalizeRoundAndPack:
4919    --zExp;
4920    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
4921
4922}
4923
4924/*
4925-------------------------------------------------------------------------------
4926Returns the result of adding the quadruple-precision floating-point values
4927`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
4928for Binary Floating-Point Arithmetic.
4929-------------------------------------------------------------------------------
4930*/
4931float128 float128_add( float128 a, float128 b )
4932{
4933    flag aSign, bSign;
4934
4935    aSign = extractFloat128Sign( a );
4936    bSign = extractFloat128Sign( b );
4937    if ( aSign == bSign ) {
4938        return addFloat128Sigs( a, b, aSign );
4939    }
4940    else {
4941        return subFloat128Sigs( a, b, aSign );
4942    }
4943
4944}
4945
4946/*
4947-------------------------------------------------------------------------------
4948Returns the result of subtracting the quadruple-precision floating-point
4949values `a' and `b'.  The operation is performed according to the IEC/IEEE
4950Standard for Binary Floating-Point Arithmetic.
4951-------------------------------------------------------------------------------
4952*/
4953float128 float128_sub( float128 a, float128 b )
4954{
4955    flag aSign, bSign;
4956
4957    aSign = extractFloat128Sign( a );
4958    bSign = extractFloat128Sign( b );
4959    if ( aSign == bSign ) {
4960        return subFloat128Sigs( a, b, aSign );
4961    }
4962    else {
4963        return addFloat128Sigs( a, b, aSign );
4964    }
4965
4966}
4967
4968/*
4969-------------------------------------------------------------------------------
4970Returns the result of multiplying the quadruple-precision floating-point
4971values `a' and `b'.  The operation is performed according to the IEC/IEEE
4972Standard for Binary Floating-Point Arithmetic.
4973-------------------------------------------------------------------------------
4974*/
4975float128 float128_mul( float128 a, float128 b )
4976{
4977    flag aSign, bSign, zSign;
4978    int32 aExp, bExp, zExp;
4979    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
4980    float128 z;
4981
4982    aSig1 = extractFloat128Frac1( a );
4983    aSig0 = extractFloat128Frac0( a );
4984    aExp = extractFloat128Exp( a );
4985    aSign = extractFloat128Sign( a );
4986    bSig1 = extractFloat128Frac1( b );
4987    bSig0 = extractFloat128Frac0( b );
4988    bExp = extractFloat128Exp( b );
4989    bSign = extractFloat128Sign( b );
4990    zSign = aSign ^ bSign;
4991    if ( aExp == 0x7FFF ) {
4992        if (    ( aSig0 | aSig1 )
4993             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4994            return propagateFloat128NaN( a, b );
4995        }
4996        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
4997        return packFloat128( zSign, 0x7FFF, 0, 0 );
4998    }
4999    if ( bExp == 0x7FFF ) {
5000        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5001        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5002 invalid:
5003            float_raise( float_flag_invalid );
5004            z.low = float128_default_nan_low;
5005            z.high = float128_default_nan_high;
5006            return z;
5007        }
5008        return packFloat128( zSign, 0x7FFF, 0, 0 );
5009    }
5010    if ( aExp == 0 ) {
5011        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5012        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5013    }
5014    if ( bExp == 0 ) {
5015        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5016        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5017    }
5018    zExp = aExp + bExp - 0x4000;
5019    aSig0 |= LIT64( 0x0001000000000000 );
5020    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5021    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5022    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5023    zSig2 |= ( zSig3 != 0 );
5024    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5025        shift128ExtraRightJamming(
5026            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5027        ++zExp;
5028    }
5029    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5030
5031}
5032
5033/*
5034-------------------------------------------------------------------------------
5035Returns the result of dividing the quadruple-precision floating-point value
5036`a' by the corresponding value `b'.  The operation is performed according to
5037the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5038-------------------------------------------------------------------------------
5039*/
5040float128 float128_div( float128 a, float128 b )
5041{
5042    flag aSign, bSign, zSign;
5043    int32 aExp, bExp, zExp;
5044    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5045    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5046    float128 z;
5047
5048    aSig1 = extractFloat128Frac1( a );
5049    aSig0 = extractFloat128Frac0( a );
5050    aExp = extractFloat128Exp( a );
5051    aSign = extractFloat128Sign( a );
5052    bSig1 = extractFloat128Frac1( b );
5053    bSig0 = extractFloat128Frac0( b );
5054    bExp = extractFloat128Exp( b );
5055    bSign = extractFloat128Sign( b );
5056    zSign = aSign ^ bSign;
5057    if ( aExp == 0x7FFF ) {
5058        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
5059        if ( bExp == 0x7FFF ) {
5060            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5061            goto invalid;
5062        }
5063        return packFloat128( zSign, 0x7FFF, 0, 0 );
5064    }
5065    if ( bExp == 0x7FFF ) {
5066        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5067        return packFloat128( zSign, 0, 0, 0 );
5068    }
5069    if ( bExp == 0 ) {
5070        if ( ( bSig0 | bSig1 ) == 0 ) {
5071            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5072 invalid:
5073                float_raise( float_flag_invalid );
5074                z.low = float128_default_nan_low;
5075                z.high = float128_default_nan_high;
5076                return z;
5077            }
5078            float_raise( float_flag_divbyzero );
5079            return packFloat128( zSign, 0x7FFF, 0, 0 );
5080        }
5081        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5082    }
5083    if ( aExp == 0 ) {
5084        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5085        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5086    }
5087    zExp = aExp - bExp + 0x3FFD;
5088    shortShift128Left(
5089        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5090    shortShift128Left(
5091        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5092    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5093        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5094        ++zExp;
5095    }
5096    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5097    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5098    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5099    while ( (sbits64) rem0 < 0 ) {
5100        --zSig0;
5101        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5102    }
5103    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5104    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5105        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5106        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5107        while ( (sbits64) rem1 < 0 ) {
5108            --zSig1;
5109            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5110        }
5111        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5112    }
5113    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5114    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5115
5116}
5117
5118/*
5119-------------------------------------------------------------------------------
5120Returns the remainder of the quadruple-precision floating-point value `a'
5121with respect to the corresponding value `b'.  The operation is performed
5122according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5123-------------------------------------------------------------------------------
5124*/
5125float128 float128_rem( float128 a, float128 b )
5126{
5127    flag aSign, zSign;
5128    int32 aExp, bExp, expDiff;
5129    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5130    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
5131    sbits64 sigMean0;
5132    float128 z;
5133
5134    aSig1 = extractFloat128Frac1( a );
5135    aSig0 = extractFloat128Frac0( a );
5136    aExp = extractFloat128Exp( a );
5137    aSign = extractFloat128Sign( a );
5138    bSig1 = extractFloat128Frac1( b );
5139    bSig0 = extractFloat128Frac0( b );
5140    bExp = extractFloat128Exp( b );
5141    if ( aExp == 0x7FFF ) {
5142        if (    ( aSig0 | aSig1 )
5143             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5144            return propagateFloat128NaN( a, b );
5145        }
5146        goto invalid;
5147    }
5148    if ( bExp == 0x7FFF ) {
5149        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5150        return a;
5151    }
5152    if ( bExp == 0 ) {
5153        if ( ( bSig0 | bSig1 ) == 0 ) {
5154 invalid:
5155            float_raise( float_flag_invalid );
5156            z.low = float128_default_nan_low;
5157            z.high = float128_default_nan_high;
5158            return z;
5159        }
5160        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5161    }
5162    if ( aExp == 0 ) {
5163        if ( ( aSig0 | aSig1 ) == 0 ) return a;
5164        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5165    }
5166    expDiff = aExp - bExp;
5167    if ( expDiff < -1 ) return a;
5168    shortShift128Left(
5169        aSig0 | LIT64( 0x0001000000000000 ),
5170        aSig1,
5171        15 - ( expDiff < 0 ),
5172        &aSig0,
5173        &aSig1
5174    );
5175    shortShift128Left(
5176        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5177    q = le128( bSig0, bSig1, aSig0, aSig1 );
5178    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5179    expDiff -= 64;
5180    while ( 0 < expDiff ) {
5181        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5182        q = ( 4 < q ) ? q - 4 : 0;
5183        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5184        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5185        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5186        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5187        expDiff -= 61;
5188    }
5189    if ( -64 < expDiff ) {
5190        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5191        q = ( 4 < q ) ? q - 4 : 0;
5192        q >>= - expDiff;
5193        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5194        expDiff += 52;
5195        if ( expDiff < 0 ) {
5196            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5197        }
5198        else {
5199            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5200        }
5201        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5202        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5203    }
5204    else {
5205        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5206        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5207    }
5208    do {
5209        alternateASig0 = aSig0;
5210        alternateASig1 = aSig1;
5211        ++q;
5212        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5213    } while ( 0 <= (sbits64) aSig0 );
5214    add128(
5215        aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
5216    if (    ( sigMean0 < 0 )
5217         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5218        aSig0 = alternateASig0;
5219        aSig1 = alternateASig1;
5220    }
5221    zSign = ( (sbits64) aSig0 < 0 );
5222    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5223    return
5224        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
5225
5226}
5227
5228/*
5229-------------------------------------------------------------------------------
5230Returns the square root of the quadruple-precision floating-point value `a'.
5231The operation is performed according to the IEC/IEEE Standard for Binary
5232Floating-Point Arithmetic.
5233-------------------------------------------------------------------------------
5234*/
5235float128 float128_sqrt( float128 a )
5236{
5237    flag aSign;
5238    int32 aExp, zExp;
5239    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5240    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5241    float128 z;
5242
5243    aSig1 = extractFloat128Frac1( a );
5244    aSig0 = extractFloat128Frac0( a );
5245    aExp = extractFloat128Exp( a );
5246    aSign = extractFloat128Sign( a );
5247    if ( aExp == 0x7FFF ) {
5248        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
5249        if ( ! aSign ) return a;
5250        goto invalid;
5251    }
5252    if ( aSign ) {
5253        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5254 invalid:
5255        float_raise( float_flag_invalid );
5256        z.low = float128_default_nan_low;
5257        z.high = float128_default_nan_high;
5258        return z;
5259    }
5260    if ( aExp == 0 ) {
5261        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5262        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5263    }
5264    zExp = (int32) ( (bits32)(aExp - 0x3FFF) >> 1) + 0x3FFE;
5265    aSig0 |= LIT64( 0x0001000000000000 );
5266    zSig0 = estimateSqrt32((int16)aExp, (bits32)(aSig0>>17));
5267    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5268    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5269    doubleZSig0 = zSig0<<1;
5270    mul64To128( zSig0, zSig0, &term0, &term1 );
5271    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5272    while ( (sbits64) rem0 < 0 ) {
5273        --zSig0;
5274        doubleZSig0 -= 2;
5275        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5276    }
5277    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5278    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5279        if ( zSig1 == 0 ) zSig1 = 1;
5280        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5281        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5282        mul64To128( zSig1, zSig1, &term2, &term3 );
5283        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5284        while ( (sbits64) rem1 < 0 ) {
5285            --zSig1;
5286            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5287            term3 |= 1;
5288            term2 |= doubleZSig0;
5289            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5290        }
5291        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5292    }
5293    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5294    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
5295
5296}
5297
5298/*
5299-------------------------------------------------------------------------------
5300Returns 1 if the quadruple-precision floating-point value `a' is equal to
5301the corresponding value `b', and 0 otherwise.  The comparison is performed
5302according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5303-------------------------------------------------------------------------------
5304*/
5305flag float128_eq( float128 a, float128 b )
5306{
5307
5308    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5309              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5310         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5311              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5312       ) {
5313        if (    float128_is_signaling_nan( a )
5314             || float128_is_signaling_nan( b ) ) {
5315            float_raise( float_flag_invalid );
5316        }
5317        return 0;
5318    }
5319    return
5320           ( a.low == b.low )
5321        && (    ( a.high == b.high )
5322             || (    ( a.low == 0 )
5323                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5324           );
5325
5326}
5327
5328/*
5329-------------------------------------------------------------------------------
5330Returns 1 if the quadruple-precision floating-point value `a' is less than
5331or equal to the corresponding value `b', and 0 otherwise.  The comparison
5332is performed according to the IEC/IEEE Standard for Binary Floating-Point
5333Arithmetic.
5334-------------------------------------------------------------------------------
5335*/
5336flag float128_le( float128 a, float128 b )
5337{
5338    flag aSign, bSign;
5339
5340    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5341              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5342         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5343              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5344       ) {
5345        float_raise( float_flag_invalid );
5346        return 0;
5347    }
5348    aSign = extractFloat128Sign( a );
5349    bSign = extractFloat128Sign( b );
5350    if ( aSign != bSign ) {
5351        return
5352               aSign
5353            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5354                 == 0 );
5355    }
5356    return
5357          aSign ? le128( b.high, b.low, a.high, a.low )
5358        : le128( a.high, a.low, b.high, b.low );
5359
5360}
5361
5362/*
5363-------------------------------------------------------------------------------
5364Returns 1 if the quadruple-precision floating-point value `a' is less than
5365the corresponding value `b', and 0 otherwise.  The comparison is performed
5366according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5367-------------------------------------------------------------------------------
5368*/
5369flag float128_lt( float128 a, float128 b )
5370{
5371    flag aSign, bSign;
5372
5373    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5374              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5375         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5376              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5377       ) {
5378        float_raise( float_flag_invalid );
5379        return 0;
5380    }
5381    aSign = extractFloat128Sign( a );
5382    bSign = extractFloat128Sign( b );
5383    if ( aSign != bSign ) {
5384        return
5385               aSign
5386            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5387                 != 0 );
5388    }
5389    return
5390          aSign ? lt128( b.high, b.low, a.high, a.low )
5391        : lt128( a.high, a.low, b.high, b.low );
5392
5393}
5394
5395/*
5396-------------------------------------------------------------------------------
5397Returns 1 if the quadruple-precision floating-point value `a' is equal to
5398the corresponding value `b', and 0 otherwise.  The invalid exception is
5399raised if either operand is a NaN.  Otherwise, the comparison is performed
5400according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5401-------------------------------------------------------------------------------
5402*/
5403flag float128_eq_signaling( float128 a, float128 b )
5404{
5405
5406    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5407              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5408         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5409              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5410       ) {
5411        float_raise( float_flag_invalid );
5412        return 0;
5413    }
5414    return
5415           ( a.low == b.low )
5416        && (    ( a.high == b.high )
5417             || (    ( a.low == 0 )
5418                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5419           );
5420
5421}
5422
5423/*
5424-------------------------------------------------------------------------------
5425Returns 1 if the quadruple-precision floating-point value `a' is less than
5426or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5427cause an exception.  Otherwise, the comparison is performed according to the
5428IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5429-------------------------------------------------------------------------------
5430*/
5431flag float128_le_quiet( float128 a, float128 b )
5432{
5433    flag aSign, bSign;
5434
5435    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5436              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5437         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5438              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5439       ) {
5440        if (    float128_is_signaling_nan( a )
5441             || float128_is_signaling_nan( b ) ) {
5442            float_raise( float_flag_invalid );
5443        }
5444        return 0;
5445    }
5446    aSign = extractFloat128Sign( a );
5447    bSign = extractFloat128Sign( b );
5448    if ( aSign != bSign ) {
5449        return
5450               aSign
5451            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5452                 == 0 );
5453    }
5454    return
5455          aSign ? le128( b.high, b.low, a.high, a.low )
5456        : le128( a.high, a.low, b.high, b.low );
5457
5458}
5459
5460/*
5461-------------------------------------------------------------------------------
5462Returns 1 if the quadruple-precision floating-point value `a' is less than
5463the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5464exception.  Otherwise, the comparison is performed according to the IEC/IEEE
5465Standard for Binary Floating-Point Arithmetic.
5466-------------------------------------------------------------------------------
5467*/
5468flag float128_lt_quiet( float128 a, float128 b )
5469{
5470    flag aSign, bSign;
5471
5472    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5473              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5474         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5475              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5476       ) {
5477        if (    float128_is_signaling_nan( a )
5478             || float128_is_signaling_nan( b ) ) {
5479            float_raise( float_flag_invalid );
5480        }
5481        return 0;
5482    }
5483    aSign = extractFloat128Sign( a );
5484    bSign = extractFloat128Sign( b );
5485    if ( aSign != bSign ) {
5486        return
5487               aSign
5488            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5489                 != 0 );
5490    }
5491    return
5492          aSign ? lt128( b.high, b.low, a.high, a.low )
5493        : lt128( a.high, a.low, b.high, b.low );
5494
5495}
5496
5497#endif
5498
5499
5500#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
5501
5502/*
5503 * These two routines are not part of the original softfloat distribution.
5504 *
5505 * They are based on the corresponding conversions to integer but return
5506 * unsigned numbers instead since these functions are required by GCC.
5507 *
5508 * Added by Mark Brinicombe <mark@NetBSD.org>	27/09/97
5509 *
5510 * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
5511 */
5512
5513/*
5514-------------------------------------------------------------------------------
5515Returns the result of converting the double-precision floating-point value
5516`a' to the 32-bit unsigned integer format.  The conversion is
5517performed according to the IEC/IEEE Standard for Binary Floating-point
5518Arithmetic, except that the conversion is always rounded toward zero.  If
5519`a' is a NaN, the largest positive integer is returned.  If the conversion
5520overflows, the largest integer positive is returned.
5521-------------------------------------------------------------------------------
5522*/
5523uint32 float64_to_uint32_round_to_zero( float64 a )
5524{
5525    flag aSign;
5526    int16 aExp, shiftCount;
5527    bits64 aSig, savedASig;
5528    uint32 z;
5529
5530    aSig = extractFloat64Frac( a );
5531    aExp = extractFloat64Exp( a );
5532    aSign = extractFloat64Sign( a );
5533
5534    if (aSign) {
5535        float_raise( float_flag_invalid );
5536    	return(0);
5537    }
5538
5539    if ( 0x41E < aExp ) {
5540        float_raise( float_flag_invalid );
5541        return 0xffffffff;
5542    }
5543    else if ( aExp < 0x3FF ) {
5544        if ( aExp || aSig ) set_float_exception_inexact_flag();
5545        return 0;
5546    }
5547    aSig |= LIT64( 0x0010000000000000 );
5548    shiftCount = 0x433 - aExp;
5549    savedASig = aSig;
5550    aSig >>= shiftCount;
5551    z = (uint32)aSig;
5552    if ( ( aSig<<shiftCount ) != savedASig ) {
5553        set_float_exception_inexact_flag();
5554    }
5555    return z;
5556
5557}
5558
5559/*
5560-------------------------------------------------------------------------------
5561Returns the result of converting the single-precision floating-point value
5562`a' to the 32-bit unsigned integer format.  The conversion is
5563performed according to the IEC/IEEE Standard for Binary Floating-point
5564Arithmetic, except that the conversion is always rounded toward zero.  If
5565`a' is a NaN, the largest positive integer is returned.  If the conversion
5566overflows, the largest positive integer is returned.
5567-------------------------------------------------------------------------------
5568*/
5569uint32 float32_to_uint32_round_to_zero( float32 a )
5570{
5571    flag aSign;
5572    int16 aExp, shiftCount;
5573    bits32 aSig;
5574    uint32 z;
5575
5576    aSig = extractFloat32Frac( a );
5577    aExp = extractFloat32Exp( a );
5578    aSign = extractFloat32Sign( a );
5579    shiftCount = aExp - 0x9E;
5580
5581    if (aSign) {
5582        float_raise( float_flag_invalid );
5583    	return(0);
5584    }
5585    if ( 0 < shiftCount ) {
5586        float_raise( float_flag_invalid );
5587        return 0xFFFFFFFF;
5588    }
5589    else if ( aExp <= 0x7E ) {
5590        if ( aExp | aSig ) set_float_exception_inexact_flag();
5591        return 0;
5592    }
5593    aSig = ( aSig | 0x800000 )<<8;
5594    z = aSig>>( - shiftCount );
5595    if ( aSig<<( shiftCount & 31 ) ) {
5596        set_float_exception_inexact_flag();
5597    }
5598    return z;
5599
5600}
5601
5602#endif
5603