1/*	$OpenBSD: softfloat.c,v 1.6 2014/07/01 20:21:17 miod Exp $	*/
2/*	$NetBSD: softfloat.c,v 1.1 2001/04/26 03:10:47 ross Exp $	*/
3
4/*
5 * This version hacked for use with gcc -msoft-float by bjh21.
6 * (Mostly a case of #ifdefing out things GCC doesn't need or provides
7 *  itself).
8 */
9
10/*
11 * Things you may want to define:
12 *
13 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
14 *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
15 *   properly renamed.
16 */
17
18/*
19===============================================================================
20
21This C source file is part of the SoftFloat IEC/IEEE Floating-point
22Arithmetic Package, Release 2a.
23
24Written by John R. Hauser.  This work was made possible in part by the
25International Computer Science Institute, located at Suite 600, 1947 Center
26Street, Berkeley, California 94704.  Funding was partially provided by the
27National Science Foundation under grant MIP-9311980.  The original version
28of this code was written as part of a project to build a fixed-point vector
29processor in collaboration with the University of California at Berkeley,
30overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
31is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
32arithmetic/SoftFloat.html'.
33
34THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable
35effort has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT
36WILL AT TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS
37RESTRICTED TO PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL
38RESPONSIBILITY FOR ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM
39THEIR OWN USE OF THE SOFTWARE, AND WHO ALSO EFFECTIVELY INDEMNIFY
40(possibly via similar legal warning) JOHN HAUSER AND THE INTERNATIONAL
41COMPUTER SCIENCE INSTITUTE AGAINST ALL LOSSES, COSTS, OR OTHER PROBLEMS
42ARISING FROM THE USE OF THE SOFTWARE BY THEIR CUSTOMERS AND CLIENTS.
43
44Derivative works are acceptable, even for commercial purposes, so long as
45(1) they include prominent notice that the work is derivative, and (2) they
46include prominent notice akin to these four paragraphs for those parts of
47this code that are retained.
48
49===============================================================================
50*/
51
52#ifdef SOFTFLOAT_FOR_GCC
53#include "softfloat-for-gcc.h"
54#endif
55
56#include "milieu.h"
57#include "softfloat.h"
58
59float32 normalizeRoundAndPackFloat32(flag, int16, bits32);
60float64 normalizeRoundAndPackFloat64(flag, int16, bits64);
61
62/*
63 * Conversions between floats as stored in memory and floats as
64 * SoftFloat uses them
65 */
66#ifndef FLOAT64_DEMANGLE
67#define FLOAT64_DEMANGLE(a)	(a)
68#endif
69#ifndef FLOAT64_MANGLE
70#define FLOAT64_MANGLE(a)	(a)
71#endif
72
73/*
74-------------------------------------------------------------------------------
75Floating-point rounding mode, extended double-precision rounding precision,
76and exception flags.
77-------------------------------------------------------------------------------
78*/
79
80/*
81 * XXX: This may cause options-MULTIPROCESSOR or thread problems someday.
82 * 	Right now, it does not.  I've removed all other dynamic global
83 * 	variables. [ross]
84 */
85#ifdef FLOATX80
86int8 floatx80_rounding_precision = 80;
87#endif
88
89/*
90-------------------------------------------------------------------------------
91Primitive arithmetic functions, including multi-word arithmetic, and
92division and square root approximations.  (Can be specialized to target if
93desired.)
94-------------------------------------------------------------------------------
95*/
96#include "softfloat-macros.h"
97
98/*
99-------------------------------------------------------------------------------
100Functions and definitions to determine:  (1) whether tininess for underflow
101is detected before or after rounding by default, (2) what (if anything)
102happens when exceptions are raised, (3) how signaling NaNs are distinguished
103from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
104are propagated from function inputs to output.  These details are target-
105specific.
106-------------------------------------------------------------------------------
107*/
108#include "softfloat-specialize.h"
109
110#ifndef SOFTFLOAT_FOR_GCC /* Not used */
111/*
112-------------------------------------------------------------------------------
113Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
114and 7, and returns the properly rounded 32-bit integer corresponding to the
115input.  If `zSign' is 1, the input is negated before being converted to an
116integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
117is simply rounded to an integer, with the inexact exception raised if the
118input cannot be represented exactly as an integer.  However, if the fixed-
119point input is too large, the invalid exception is raised and the largest
120positive or negative integer is returned.
121-------------------------------------------------------------------------------
122*/
123static int32 roundAndPackInt32( flag zSign, bits64 absZ )
124{
125    int8 roundingMode;
126    flag roundNearestEven;
127    int8 roundIncrement, roundBits;
128    int32 z;
129
130    roundingMode = float_rounding_mode();
131    roundNearestEven = ( roundingMode == float_round_nearest_even );
132    roundIncrement = 0x40;
133    if ( ! roundNearestEven ) {
134        if ( roundingMode == float_round_to_zero ) {
135            roundIncrement = 0;
136        }
137        else {
138            roundIncrement = 0x7F;
139            if ( zSign ) {
140                if ( roundingMode == float_round_up ) roundIncrement = 0;
141            }
142            else {
143                if ( roundingMode == float_round_down ) roundIncrement = 0;
144            }
145        }
146    }
147    roundBits = absZ & 0x7F;
148    absZ = ( absZ + roundIncrement )>>7;
149    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
150    z = absZ;
151    if ( zSign ) z = - z;
152    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
153        float_raise( float_flag_invalid );
154        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
155    }
156    if ( roundBits ) float_set_inexact();
157    return z;
158
159}
160
161/*
162-------------------------------------------------------------------------------
163Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
164`absZ1', with binary point between bits 63 and 64 (between the input words),
165and returns the properly rounded 64-bit integer corresponding to the input.
166If `zSign' is 1, the input is negated before being converted to an integer.
167Ordinarily, the fixed-point input is simply rounded to an integer, with
168the inexact exception raised if the input cannot be represented exactly as
169an integer.  However, if the fixed-point input is too large, the invalid
170exception is raised and the largest positive or negative integer is
171returned.
172-------------------------------------------------------------------------------
173*/
174static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
175{
176    int8 roundingMode;
177    flag roundNearestEven, increment;
178    int64 z;
179
180    roundingMode = float_rounding_mode();
181    roundNearestEven = ( roundingMode == float_round_nearest_even );
182    increment = ( (sbits64) absZ1 < 0 );
183    if ( ! roundNearestEven ) {
184        if ( roundingMode == float_round_to_zero ) {
185            increment = 0;
186        }
187        else {
188            if ( zSign ) {
189                increment = ( roundingMode == float_round_down ) && absZ1;
190            }
191            else {
192                increment = ( roundingMode == float_round_up ) && absZ1;
193            }
194        }
195    }
196    if ( increment ) {
197        ++absZ0;
198        if ( absZ0 == 0 ) goto overflow;
199        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
200    }
201    z = absZ0;
202    if ( zSign ) z = - z;
203    if ( z && ( ( z < 0 ) ^ zSign ) ) {
204 overflow:
205        float_raise( float_flag_invalid );
206        return
207              zSign ? (sbits64) LIT64( 0x8000000000000000 )
208            : LIT64( 0x7FFFFFFFFFFFFFFF );
209    }
210    if ( absZ1 ) float_set_inexact();
211    return z;
212
213}
214
215#ifdef __alpha__
216/*
217-------------------------------------------------------------------------------
218Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
219`absZ1', with binary point between bits 63 and 64 (between the input words),
220and returns the properly rounded 64-bit integer corresponding to the input.
221If `zSign' is 1, the input is negated before being converted to an integer.
222Ordinarily, the fixed-point input is simply rounded to an integer, with
223the inexact exception raised if the input cannot be represented exactly as
224an integer.
225-------------------------------------------------------------------------------
226*/
227static int64 roundAndPackInt64NoOverflow( flag zSign, bits64 absZ0,
228    bits64 absZ1 )
229{
230    int8 roundingMode;
231    flag roundNearestEven, increment;
232    int64 z;
233
234    roundingMode = float_rounding_mode();
235    roundNearestEven = ( roundingMode == float_round_nearest_even );
236    increment = ( (sbits64) absZ1 < 0 );
237    if ( ! roundNearestEven ) {
238        if ( roundingMode == float_round_to_zero ) {
239            increment = 0;
240        }
241        else {
242            if ( zSign ) {
243                increment = ( roundingMode == float_round_down ) && absZ1;
244            }
245            else {
246                increment = ( roundingMode == float_round_up ) && absZ1;
247            }
248        }
249    }
250    if ( increment ) {
251        ++absZ0;
252        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
253    }
254    z = absZ0;
255    if ( zSign ) z = - z;
256    if ( absZ1 ) float_set_inexact();
257    return z;
258
259}
260#endif /* __alpha__ */
261#endif
262
263/*
264-------------------------------------------------------------------------------
265Returns the fraction bits of the single-precision floating-point value `a'.
266-------------------------------------------------------------------------------
267*/
268INLINE bits32 extractFloat32Frac( float32 a )
269{
270
271    return a & 0x007FFFFF;
272
273}
274
275/*
276-------------------------------------------------------------------------------
277Returns the exponent bits of the single-precision floating-point value `a'.
278-------------------------------------------------------------------------------
279*/
280INLINE int16 extractFloat32Exp( float32 a )
281{
282
283    return ( a>>23 ) & 0xFF;
284
285}
286
287/*
288-------------------------------------------------------------------------------
289Returns the sign bit of the single-precision floating-point value `a'.
290-------------------------------------------------------------------------------
291*/
292INLINE flag extractFloat32Sign( float32 a )
293{
294
295    return a>>31;
296
297}
298
299/*
300-------------------------------------------------------------------------------
301Normalizes the subnormal single-precision floating-point value represented
302by the denormalized significand `aSig'.  The normalized exponent and
303significand are stored at the locations pointed to by `zExpPtr' and
304`zSigPtr', respectively.
305-------------------------------------------------------------------------------
306*/
307static void
308 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
309{
310    int8 shiftCount;
311
312    shiftCount = countLeadingZeros32( aSig ) - 8;
313    *zSigPtr = aSig<<shiftCount;
314    *zExpPtr = 1 - shiftCount;
315
316}
317
318/*
319-------------------------------------------------------------------------------
320Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
321single-precision floating-point value, returning the result.  After being
322shifted into the proper positions, the three fields are simply added
323together to form the result.  This means that any integer portion of `zSig'
324will be added into the exponent.  Since a properly normalized significand
325will have an integer portion equal to 1, the `zExp' input should be 1 less
326than the desired result exponent whenever `zSig' is a complete, normalized
327significand.
328-------------------------------------------------------------------------------
329*/
330INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
331{
332
333    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
334
335}
336
337/*
338-------------------------------------------------------------------------------
339Takes an abstract floating-point value having sign `zSign', exponent `zExp',
340and significand `zSig', and returns the proper single-precision floating-
341point value corresponding to the abstract input.  Ordinarily, the abstract
342value is simply rounded and packed into the single-precision format, with
343the inexact exception raised if the abstract input cannot be represented
344exactly.  However, if the abstract value is too large, the overflow and
345inexact exceptions are raised and an infinity or maximal finite value is
346returned.  If the abstract value is too small, the input value is rounded to
347a subnormal number, and the underflow and inexact exceptions are raised if
348the abstract input cannot be represented exactly as a subnormal single-
349precision floating-point number.
350    The input significand `zSig' has its binary point between bits 30
351and 29, which is 7 bits to the left of the usual location.  This shifted
352significand must be normalized or smaller.  If `zSig' is not normalized,
353`zExp' must be 0; in that case, the result returned is a subnormal number,
354and it must not require rounding.  In the usual case that `zSig' is
355normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
356The handling of underflow and overflow follows the IEC/IEEE Standard for
357Binary Floating-Point Arithmetic.
358-------------------------------------------------------------------------------
359*/
360static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
361{
362    int8 roundingMode;
363    flag roundNearestEven;
364    int8 roundIncrement, roundBits;
365    flag isTiny;
366
367    roundingMode = float_rounding_mode();
368    roundNearestEven = ( roundingMode == float_round_nearest_even );
369    roundIncrement = 0x40;
370    if ( ! roundNearestEven ) {
371        if ( roundingMode == float_round_to_zero ) {
372            roundIncrement = 0;
373        }
374        else {
375            roundIncrement = 0x7F;
376            if ( zSign ) {
377                if ( roundingMode == float_round_up ) roundIncrement = 0;
378            }
379            else {
380                if ( roundingMode == float_round_down ) roundIncrement = 0;
381            }
382        }
383    }
384    roundBits = zSig & 0x7F;
385    if ( 0xFD <= (bits16) zExp ) {
386        if (    ( 0xFD < zExp )
387             || (    ( zExp == 0xFD )
388                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
389           ) {
390            float_raise( float_flag_overflow | float_flag_inexact );
391            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
392        }
393        if ( zExp < 0 ) {
394            isTiny =
395                   ( float_detect_tininess == float_tininess_before_rounding )
396                || ( zExp < -1 )
397                || ( zSig + roundIncrement < 0x80000000 );
398            shift32RightJamming( zSig, - zExp, &zSig );
399            zExp = 0;
400            roundBits = zSig & 0x7F;
401            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
402        }
403    }
404    if ( roundBits ) float_set_inexact();
405    zSig = ( zSig + roundIncrement )>>7;
406    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
407    if ( zSig == 0 ) zExp = 0;
408    return packFloat32( zSign, zExp, zSig );
409
410}
411
412/*
413-------------------------------------------------------------------------------
414Takes an abstract floating-point value having sign `zSign', exponent `zExp',
415and significand `zSig', and returns the proper single-precision floating-
416point value corresponding to the abstract input.  This routine is just like
417`roundAndPackFloat32' except that `zSig' does not have to be normalized.
418Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
419floating-point exponent.
420-------------------------------------------------------------------------------
421*/
422float32
423 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
424{
425    int8 shiftCount;
426
427    shiftCount = countLeadingZeros32( zSig ) - 1;
428    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
429
430}
431
432/*
433-------------------------------------------------------------------------------
434Returns the fraction bits of the double-precision floating-point value `a'.
435-------------------------------------------------------------------------------
436*/
437INLINE bits64 extractFloat64Frac( float64 a )
438{
439
440    return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
441
442}
443
444/*
445-------------------------------------------------------------------------------
446Returns the exponent bits of the double-precision floating-point value `a'.
447-------------------------------------------------------------------------------
448*/
449INLINE int16 extractFloat64Exp( float64 a )
450{
451
452    return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
453
454}
455
456/*
457-------------------------------------------------------------------------------
458Returns the sign bit of the double-precision floating-point value `a'.
459-------------------------------------------------------------------------------
460*/
461INLINE flag extractFloat64Sign( float64 a )
462{
463
464    return FLOAT64_DEMANGLE(a)>>63;
465
466}
467
468/*
469-------------------------------------------------------------------------------
470Normalizes the subnormal double-precision floating-point value represented
471by the denormalized significand `aSig'.  The normalized exponent and
472significand are stored at the locations pointed to by `zExpPtr' and
473`zSigPtr', respectively.
474-------------------------------------------------------------------------------
475*/
476static void
477 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
478{
479    int8 shiftCount;
480
481    shiftCount = countLeadingZeros64( aSig ) - 11;
482    *zSigPtr = aSig<<shiftCount;
483    *zExpPtr = 1 - shiftCount;
484
485}
486
487/*
488-------------------------------------------------------------------------------
489Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
490double-precision floating-point value, returning the result.  After being
491shifted into the proper positions, the three fields are simply added
492together to form the result.  This means that any integer portion of `zSig'
493will be added into the exponent.  Since a properly normalized significand
494will have an integer portion equal to 1, the `zExp' input should be 1 less
495than the desired result exponent whenever `zSig' is a complete, normalized
496significand.
497-------------------------------------------------------------------------------
498*/
499INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
500{
501
502    return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
503			   ( ( (bits64) zExp )<<52 ) + zSig );
504
505}
506
507/*
508-------------------------------------------------------------------------------
509Takes an abstract floating-point value having sign `zSign', exponent `zExp',
510and significand `zSig', and returns the proper double-precision floating-
511point value corresponding to the abstract input.  Ordinarily, the abstract
512value is simply rounded and packed into the double-precision format, with
513the inexact exception raised if the abstract input cannot be represented
514exactly.  However, if the abstract value is too large, the overflow and
515inexact exceptions are raised and an infinity or maximal finite value is
516returned.  If the abstract value is too small, the input value is rounded to
517a subnormal number, and the underflow and inexact exceptions are raised if
518the abstract input cannot be represented exactly as a subnormal double-
519precision floating-point number.
520    The input significand `zSig' has its binary point between bits 62
521and 61, which is 10 bits to the left of the usual location.  This shifted
522significand must be normalized or smaller.  If `zSig' is not normalized,
523`zExp' must be 0; in that case, the result returned is a subnormal number,
524and it must not require rounding.  In the usual case that `zSig' is
525normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
526The handling of underflow and overflow follows the IEC/IEEE Standard for
527Binary Floating-Point Arithmetic.
528-------------------------------------------------------------------------------
529*/
530static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
531{
532    int8 roundingMode;
533    flag roundNearestEven;
534    int16 roundIncrement, roundBits;
535    flag isTiny;
536
537    roundingMode = float_rounding_mode();
538    roundNearestEven = ( roundingMode == float_round_nearest_even );
539    roundIncrement = 0x200;
540    if ( ! roundNearestEven ) {
541        if ( roundingMode == float_round_to_zero ) {
542            roundIncrement = 0;
543        }
544        else {
545            roundIncrement = 0x3FF;
546            if ( zSign ) {
547                if ( roundingMode == float_round_up ) roundIncrement = 0;
548            }
549            else {
550                if ( roundingMode == float_round_down ) roundIncrement = 0;
551            }
552        }
553    }
554    roundBits = zSig & 0x3FF;
555    if ( 0x7FD <= (bits16) zExp ) {
556        if (    ( 0x7FD < zExp )
557             || (    ( zExp == 0x7FD )
558                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
559           ) {
560            float_raise( float_flag_overflow | float_flag_inexact );
561            return FLOAT64_MANGLE(
562		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
563		( roundIncrement == 0 ));
564        }
565        if ( zExp < 0 ) {
566            isTiny =
567                   ( float_detect_tininess == float_tininess_before_rounding )
568                || ( zExp < -1 )
569                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
570            shift64RightJamming( zSig, - zExp, &zSig );
571            zExp = 0;
572            roundBits = zSig & 0x3FF;
573            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
574        }
575    }
576    if ( roundBits ) float_set_inexact();
577    zSig = ( zSig + roundIncrement )>>10;
578    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
579    if ( zSig == 0 ) zExp = 0;
580    return packFloat64( zSign, zExp, zSig );
581
582}
583
584/*
585-------------------------------------------------------------------------------
586Takes an abstract floating-point value having sign `zSign', exponent `zExp',
587and significand `zSig', and returns the proper double-precision floating-
588point value corresponding to the abstract input.  This routine is just like
589`roundAndPackFloat64' except that `zSig' does not have to be normalized.
590Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
591floating-point exponent.
592-------------------------------------------------------------------------------
593*/
594float64
595 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
596{
597    int8 shiftCount;
598
599    shiftCount = countLeadingZeros64( zSig ) - 1;
600    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
601
602}
603
604#ifdef FLOATX80
605
606/*
607-------------------------------------------------------------------------------
608Returns the fraction bits of the extended double-precision floating-point
609value `a'.
610-------------------------------------------------------------------------------
611*/
612INLINE bits64 extractFloatx80Frac( floatx80 a )
613{
614
615    return a.low;
616
617}
618
619/*
620-------------------------------------------------------------------------------
621Returns the exponent bits of the extended double-precision floating-point
622value `a'.
623-------------------------------------------------------------------------------
624*/
625INLINE int32 extractFloatx80Exp( floatx80 a )
626{
627
628    return a.high & 0x7FFF;
629
630}
631
632/*
633-------------------------------------------------------------------------------
634Returns the sign bit of the extended double-precision floating-point value
635`a'.
636-------------------------------------------------------------------------------
637*/
638INLINE flag extractFloatx80Sign( floatx80 a )
639{
640
641    return a.high>>15;
642
643}
644
645/*
646-------------------------------------------------------------------------------
647Normalizes the subnormal extended double-precision floating-point value
648represented by the denormalized significand `aSig'.  The normalized exponent
649and significand are stored at the locations pointed to by `zExpPtr' and
650`zSigPtr', respectively.
651-------------------------------------------------------------------------------
652*/
653static void
654 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
655{
656    int8 shiftCount;
657
658    shiftCount = countLeadingZeros64( aSig );
659    *zSigPtr = aSig<<shiftCount;
660    *zExpPtr = 1 - shiftCount;
661
662}
663
664/*
665-------------------------------------------------------------------------------
666Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
667extended double-precision floating-point value, returning the result.
668-------------------------------------------------------------------------------
669*/
670INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
671{
672    floatx80 z;
673
674    z.low = zSig;
675    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
676    return z;
677
678}
679
680/*
681-------------------------------------------------------------------------------
682Takes an abstract floating-point value having sign `zSign', exponent `zExp',
683and extended significand formed by the concatenation of `zSig0' and `zSig1',
684and returns the proper extended double-precision floating-point value
685corresponding to the abstract input.  Ordinarily, the abstract value is
686rounded and packed into the extended double-precision format, with the
687inexact exception raised if the abstract input cannot be represented
688exactly.  However, if the abstract value is too large, the overflow and
689inexact exceptions are raised and an infinity or maximal finite value is
690returned.  If the abstract value is too small, the input value is rounded to
691a subnormal number, and the underflow and inexact exceptions are raised if
692the abstract input cannot be represented exactly as a subnormal extended
693double-precision floating-point number.
694    If `roundingPrecision' is 32 or 64, the result is rounded to the same
695number of bits as single or double precision, respectively.  Otherwise, the
696result is rounded to the full precision of the extended double-precision
697format.
698    The input significand must be normalized or smaller.  If the input
699significand is not normalized, `zExp' must be 0; in that case, the result
700returned is a subnormal number, and it must not require rounding.  The
701handling of underflow and overflow follows the IEC/IEEE Standard for Binary
702Floating-Point Arithmetic.
703-------------------------------------------------------------------------------
704*/
705static floatx80
706 roundAndPackFloatx80(
707     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
708 )
709{
710    int8 roundingMode;
711    flag roundNearestEven, increment, isTiny;
712    int64 roundIncrement, roundMask, roundBits;
713
714    roundingMode = float_rounding_mode();
715    roundNearestEven = ( roundingMode == float_round_nearest_even );
716    if ( roundingPrecision == 80 ) goto precision80;
717    if ( roundingPrecision == 64 ) {
718        roundIncrement = LIT64( 0x0000000000000400 );
719        roundMask = LIT64( 0x00000000000007FF );
720    }
721    else if ( roundingPrecision == 32 ) {
722        roundIncrement = LIT64( 0x0000008000000000 );
723        roundMask = LIT64( 0x000000FFFFFFFFFF );
724    }
725    else {
726        goto precision80;
727    }
728    zSig0 |= ( zSig1 != 0 );
729    if ( ! roundNearestEven ) {
730        if ( roundingMode == float_round_to_zero ) {
731            roundIncrement = 0;
732        }
733        else {
734            roundIncrement = roundMask;
735            if ( zSign ) {
736                if ( roundingMode == float_round_up ) roundIncrement = 0;
737            }
738            else {
739                if ( roundingMode == float_round_down ) roundIncrement = 0;
740            }
741        }
742    }
743    roundBits = zSig0 & roundMask;
744    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
745        if (    ( 0x7FFE < zExp )
746             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
747           ) {
748            goto overflow;
749        }
750        if ( zExp <= 0 ) {
751            isTiny =
752                   ( float_detect_tininess == float_tininess_before_rounding )
753                || ( zExp < 0 )
754                || ( zSig0 <= zSig0 + roundIncrement );
755            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
756            zExp = 0;
757            roundBits = zSig0 & roundMask;
758            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
759            if ( roundBits ) float_set_inexact();
760            zSig0 += roundIncrement;
761            if ( (sbits64) zSig0 < 0 ) zExp = 1;
762            roundIncrement = roundMask + 1;
763            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
764                roundMask |= roundIncrement;
765            }
766            zSig0 &= ~ roundMask;
767            return packFloatx80( zSign, zExp, zSig0 );
768        }
769    }
770    if ( roundBits ) float_set_inexact();
771    zSig0 += roundIncrement;
772    if ( zSig0 < roundIncrement ) {
773        ++zExp;
774        zSig0 = LIT64( 0x8000000000000000 );
775    }
776    roundIncrement = roundMask + 1;
777    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
778        roundMask |= roundIncrement;
779    }
780    zSig0 &= ~ roundMask;
781    if ( zSig0 == 0 ) zExp = 0;
782    return packFloatx80( zSign, zExp, zSig0 );
783 precision80:
784    increment = ( (sbits64) zSig1 < 0 );
785    if ( ! roundNearestEven ) {
786        if ( roundingMode == float_round_to_zero ) {
787            increment = 0;
788        }
789        else {
790            if ( zSign ) {
791                increment = ( roundingMode == float_round_down ) && zSig1;
792            }
793            else {
794                increment = ( roundingMode == float_round_up ) && zSig1;
795            }
796        }
797    }
798    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
799        if (    ( 0x7FFE < zExp )
800             || (    ( zExp == 0x7FFE )
801                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
802                  && increment
803                )
804           ) {
805            roundMask = 0;
806 overflow:
807            float_raise( float_flag_overflow | float_flag_inexact );
808            if (    ( roundingMode == float_round_to_zero )
809                 || ( zSign && ( roundingMode == float_round_up ) )
810                 || ( ! zSign && ( roundingMode == float_round_down ) )
811               ) {
812                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
813            }
814            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
815        }
816        if ( zExp <= 0 ) {
817            isTiny =
818                   ( float_detect_tininess == float_tininess_before_rounding )
819                || ( zExp < 0 )
820                || ! increment
821                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
822            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
823            zExp = 0;
824            if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
825            if ( zSig1 ) float_set_inexact();
826            if ( roundNearestEven ) {
827                increment = ( (sbits64) zSig1 < 0 );
828            }
829            else {
830                if ( zSign ) {
831                    increment = ( roundingMode == float_round_down ) && zSig1;
832                }
833                else {
834                    increment = ( roundingMode == float_round_up ) && zSig1;
835                }
836            }
837            if ( increment ) {
838                ++zSig0;
839                zSig0 &=
840                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
841                if ( (sbits64) zSig0 < 0 ) zExp = 1;
842            }
843            return packFloatx80( zSign, zExp, zSig0 );
844        }
845    }
846    if ( zSig1 ) float_set_inexact();
847    if ( increment ) {
848        ++zSig0;
849        if ( zSig0 == 0 ) {
850            ++zExp;
851            zSig0 = LIT64( 0x8000000000000000 );
852        }
853        else {
854            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
855        }
856    }
857    else {
858        if ( zSig0 == 0 ) zExp = 0;
859    }
860    return packFloatx80( zSign, zExp, zSig0 );
861
862}
863
864/*
865-------------------------------------------------------------------------------
866Takes an abstract floating-point value having sign `zSign', exponent
867`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
868and returns the proper extended double-precision floating-point value
869corresponding to the abstract input.  This routine is just like
870`roundAndPackFloatx80' except that the input significand does not have to be
871normalized.
872-------------------------------------------------------------------------------
873*/
874static floatx80
875 normalizeRoundAndPackFloatx80(
876     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
877 )
878{
879    int8 shiftCount;
880
881    if ( zSig0 == 0 ) {
882        zSig0 = zSig1;
883        zSig1 = 0;
884        zExp -= 64;
885    }
886    shiftCount = countLeadingZeros64( zSig0 );
887    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
888    zExp -= shiftCount;
889    return
890        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
891
892}
893
894#endif
895
896#ifdef FLOAT128
897
898/*
899-------------------------------------------------------------------------------
900Returns the least-significant 64 fraction bits of the quadruple-precision
901floating-point value `a'.
902-------------------------------------------------------------------------------
903*/
904INLINE bits64 extractFloat128Frac1( float128 a )
905{
906
907    return a.low;
908
909}
910
911/*
912-------------------------------------------------------------------------------
913Returns the most-significant 48 fraction bits of the quadruple-precision
914floating-point value `a'.
915-------------------------------------------------------------------------------
916*/
917INLINE bits64 extractFloat128Frac0( float128 a )
918{
919
920    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
921
922}
923
924/*
925-------------------------------------------------------------------------------
926Returns the exponent bits of the quadruple-precision floating-point value
927`a'.
928-------------------------------------------------------------------------------
929*/
930INLINE int32 extractFloat128Exp( float128 a )
931{
932
933    return ( a.high>>48 ) & 0x7FFF;
934
935}
936
937/*
938-------------------------------------------------------------------------------
939Returns the sign bit of the quadruple-precision floating-point value `a'.
940-------------------------------------------------------------------------------
941*/
942INLINE flag extractFloat128Sign( float128 a )
943{
944
945    return a.high>>63;
946
947}
948
949/*
950-------------------------------------------------------------------------------
951Normalizes the subnormal quadruple-precision floating-point value
952represented by the denormalized significand formed by the concatenation of
953`aSig0' and `aSig1'.  The normalized exponent is stored at the location
954pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
955significand are stored at the location pointed to by `zSig0Ptr', and the
956least significant 64 bits of the normalized significand are stored at the
957location pointed to by `zSig1Ptr'.
958-------------------------------------------------------------------------------
959*/
960static void
961 normalizeFloat128Subnormal(
962     bits64 aSig0,
963     bits64 aSig1,
964     int32 *zExpPtr,
965     bits64 *zSig0Ptr,
966     bits64 *zSig1Ptr
967 )
968{
969    int8 shiftCount;
970
971    if ( aSig0 == 0 ) {
972        shiftCount = countLeadingZeros64( aSig1 ) - 15;
973        if ( shiftCount < 0 ) {
974            *zSig0Ptr = aSig1>>( - shiftCount );
975            *zSig1Ptr = aSig1<<( shiftCount & 63 );
976        }
977        else {
978            *zSig0Ptr = aSig1<<shiftCount;
979            *zSig1Ptr = 0;
980        }
981        *zExpPtr = - shiftCount - 63;
982    }
983    else {
984        shiftCount = countLeadingZeros64( aSig0 ) - 15;
985        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
986        *zExpPtr = 1 - shiftCount;
987    }
988
989}
990
991/*
992-------------------------------------------------------------------------------
993Packs the sign `zSign', the exponent `zExp', and the significand formed
994by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
995floating-point value, returning the result.  After being shifted into the
996proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
997added together to form the most significant 32 bits of the result.  This
998means that any integer portion of `zSig0' will be added into the exponent.
999Since a properly normalized significand will have an integer portion equal
1000to 1, the `zExp' input should be 1 less than the desired result exponent
1001whenever `zSig0' and `zSig1' concatenated form a complete, normalized
1002significand.
1003-------------------------------------------------------------------------------
1004*/
1005INLINE float128
1006 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
1007{
1008    float128 z;
1009
1010    z.low = zSig1;
1011    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
1012    return z;
1013
1014}
1015
1016/*
1017-------------------------------------------------------------------------------
1018Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1019and extended significand formed by the concatenation of `zSig0', `zSig1',
1020and `zSig2', and returns the proper quadruple-precision floating-point value
1021corresponding to the abstract input.  Ordinarily, the abstract value is
1022simply rounded and packed into the quadruple-precision format, with the
1023inexact exception raised if the abstract input cannot be represented
1024exactly.  However, if the abstract value is too large, the overflow and
1025inexact exceptions are raised and an infinity or maximal finite value is
1026returned.  If the abstract value is too small, the input value is rounded to
1027a subnormal number, and the underflow and inexact exceptions are raised if
1028the abstract input cannot be represented exactly as a subnormal quadruple-
1029precision floating-point number.
1030    The input significand must be normalized or smaller.  If the input
1031significand is not normalized, `zExp' must be 0; in that case, the result
1032returned is a subnormal number, and it must not require rounding.  In the
1033usual case that the input significand is normalized, `zExp' must be 1 less
1034than the ``true'' floating-point exponent.  The handling of underflow and
1035overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1036-------------------------------------------------------------------------------
1037*/
1038static float128
1039 roundAndPackFloat128(
1040     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
1041{
1042    int8 roundingMode;
1043    flag roundNearestEven, increment, isTiny;
1044
1045    roundingMode = float_rounding_mode();
1046    roundNearestEven = ( roundingMode == float_round_nearest_even );
1047    increment = ( (sbits64) zSig2 < 0 );
1048    if ( ! roundNearestEven ) {
1049        if ( roundingMode == float_round_to_zero ) {
1050            increment = 0;
1051        }
1052        else {
1053            if ( zSign ) {
1054                increment = ( roundingMode == float_round_down ) && zSig2;
1055            }
1056            else {
1057                increment = ( roundingMode == float_round_up ) && zSig2;
1058            }
1059        }
1060    }
1061    if ( 0x7FFD <= (bits32) zExp ) {
1062        if (    ( 0x7FFD < zExp )
1063             || (    ( zExp == 0x7FFD )
1064                  && eq128(
1065                         LIT64( 0x0001FFFFFFFFFFFF ),
1066                         LIT64( 0xFFFFFFFFFFFFFFFF ),
1067                         zSig0,
1068                         zSig1
1069                     )
1070                  && increment
1071                )
1072           ) {
1073            float_raise( float_flag_overflow | float_flag_inexact );
1074            if (    ( roundingMode == float_round_to_zero )
1075                 || ( zSign && ( roundingMode == float_round_up ) )
1076                 || ( ! zSign && ( roundingMode == float_round_down ) )
1077               ) {
1078                return
1079                    packFloat128(
1080                        zSign,
1081                        0x7FFE,
1082                        LIT64( 0x0000FFFFFFFFFFFF ),
1083                        LIT64( 0xFFFFFFFFFFFFFFFF )
1084                    );
1085            }
1086            return packFloat128( zSign, 0x7FFF, 0, 0 );
1087        }
1088        if ( zExp < 0 ) {
1089            isTiny =
1090                   ( float_detect_tininess == float_tininess_before_rounding )
1091                || ( zExp < -1 )
1092                || ! increment
1093                || lt128(
1094                       zSig0,
1095                       zSig1,
1096                       LIT64( 0x0001FFFFFFFFFFFF ),
1097                       LIT64( 0xFFFFFFFFFFFFFFFF )
1098                   );
1099            shift128ExtraRightJamming(
1100                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1101            zExp = 0;
1102            if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
1103            if ( roundNearestEven ) {
1104                increment = ( (sbits64) zSig2 < 0 );
1105            }
1106            else {
1107                if ( zSign ) {
1108                    increment = ( roundingMode == float_round_down ) && zSig2;
1109                }
1110                else {
1111                    increment = ( roundingMode == float_round_up ) && zSig2;
1112                }
1113            }
1114        }
1115    }
1116    if ( zSig2 ) float_set_inexact();
1117    if ( increment ) {
1118        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1119        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1120    }
1121    else {
1122        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1123    }
1124    return packFloat128( zSign, zExp, zSig0, zSig1 );
1125
1126}
1127
1128/*
1129-------------------------------------------------------------------------------
1130Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1131and significand formed by the concatenation of `zSig0' and `zSig1', and
1132returns the proper quadruple-precision floating-point value corresponding
1133to the abstract input.  This routine is just like `roundAndPackFloat128'
1134except that the input significand has fewer bits and does not have to be
1135normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1136point exponent.
1137-------------------------------------------------------------------------------
1138*/
1139static float128
1140 normalizeRoundAndPackFloat128(
1141     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
1142{
1143    int8 shiftCount;
1144    bits64 zSig2;
1145
1146    if ( zSig0 == 0 ) {
1147        zSig0 = zSig1;
1148        zSig1 = 0;
1149        zExp -= 64;
1150    }
1151    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1152    if ( 0 <= shiftCount ) {
1153        zSig2 = 0;
1154        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1155    }
1156    else {
1157        shift128ExtraRightJamming(
1158            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1159    }
1160    zExp -= shiftCount;
1161    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
1162
1163}
1164
1165#endif
1166
1167/*
1168-------------------------------------------------------------------------------
1169Returns the result of converting the 32-bit two's complement integer `a'
1170to the single-precision floating-point format.  The conversion is performed
1171according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1172-------------------------------------------------------------------------------
1173*/
1174float32 int32_to_float32( int32 a )
1175{
1176    flag zSign;
1177
1178    if ( a == 0 ) return 0;
1179    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1180    zSign = ( a < 0 );
1181    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
1182
1183}
1184
1185/*
1186-------------------------------------------------------------------------------
1187Returns the result of converting the 32-bit two's complement integer `a'
1188to the double-precision floating-point format.  The conversion is performed
1189according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1190-------------------------------------------------------------------------------
1191*/
1192float64 int32_to_float64( int32 a )
1193{
1194    flag zSign;
1195    uint32 absA;
1196    int8 shiftCount;
1197    bits64 zSig;
1198
1199    if ( a == 0 ) return 0;
1200    zSign = ( a < 0 );
1201    absA = zSign ? - a : a;
1202    shiftCount = countLeadingZeros32( absA ) + 21;
1203    zSig = absA;
1204    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1205
1206}
1207
1208#ifdef FLOATX80
1209
1210/*
1211-------------------------------------------------------------------------------
1212Returns the result of converting the 32-bit two's complement integer `a'
1213to the extended double-precision floating-point format.  The conversion
1214is performed according to the IEC/IEEE Standard for Binary Floating-Point
1215Arithmetic.
1216-------------------------------------------------------------------------------
1217*/
1218floatx80 int32_to_floatx80( int32 a )
1219{
1220    flag zSign;
1221    uint32 absA;
1222    int8 shiftCount;
1223    bits64 zSig;
1224
1225    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1226    zSign = ( a < 0 );
1227    absA = zSign ? - a : a;
1228    shiftCount = countLeadingZeros32( absA ) + 32;
1229    zSig = absA;
1230    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1231
1232}
1233
1234#endif
1235
1236#ifdef FLOAT128
1237
1238/*
1239-------------------------------------------------------------------------------
1240Returns the result of converting the 32-bit two's complement integer `a' to
1241the quadruple-precision floating-point format.  The conversion is performed
1242according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1243-------------------------------------------------------------------------------
1244*/
1245float128 int32_to_float128( int32 a )
1246{
1247    flag zSign;
1248    uint32 absA;
1249    int8 shiftCount;
1250    bits64 zSig0;
1251
1252    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1253    zSign = ( a < 0 );
1254    absA = zSign ? - a : a;
1255    shiftCount = countLeadingZeros32( absA ) + 17;
1256    zSig0 = absA;
1257    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1258
1259}
1260
1261#endif
1262
1263#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
1264/*
1265-------------------------------------------------------------------------------
1266Returns the result of converting the 64-bit two's complement integer `a'
1267to the single-precision floating-point format.  The conversion is performed
1268according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1269-------------------------------------------------------------------------------
1270*/
1271float32 int64_to_float32( int64 a )
1272{
1273    flag zSign;
1274    uint64 absA;
1275    int8 shiftCount;
1276
1277    if ( a == 0 ) return 0;
1278    zSign = ( a < 0 );
1279    absA = zSign ? - a : a;
1280    shiftCount = countLeadingZeros64( absA ) - 40;
1281    if ( 0 <= shiftCount ) {
1282        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1283    }
1284    else {
1285        shiftCount += 7;
1286        if ( shiftCount < 0 ) {
1287            shift64RightJamming( absA, - shiftCount, &absA );
1288        }
1289        else {
1290            absA <<= shiftCount;
1291        }
1292        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
1293    }
1294
1295}
1296
1297/*
1298-------------------------------------------------------------------------------
1299Returns the result of converting the 64-bit two's complement integer `a'
1300to the double-precision floating-point format.  The conversion is performed
1301according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1302-------------------------------------------------------------------------------
1303*/
1304float64 int64_to_float64( int64 a )
1305{
1306    flag zSign;
1307
1308    if ( a == 0 ) return 0;
1309    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1310        return packFloat64( 1, 0x43E, 0 );
1311    }
1312    zSign = ( a < 0 );
1313    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
1314
1315}
1316
1317#ifdef FLOATX80
1318
1319/*
1320-------------------------------------------------------------------------------
1321Returns the result of converting the 64-bit two's complement integer `a'
1322to the extended double-precision floating-point format.  The conversion
1323is performed according to the IEC/IEEE Standard for Binary Floating-Point
1324Arithmetic.
1325-------------------------------------------------------------------------------
1326*/
1327floatx80 int64_to_floatx80( int64 a )
1328{
1329    flag zSign;
1330    uint64 absA;
1331    int8 shiftCount;
1332
1333    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1334    zSign = ( a < 0 );
1335    absA = zSign ? - a : a;
1336    shiftCount = countLeadingZeros64( absA );
1337    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1338
1339}
1340
1341#endif
1342
1343#ifdef FLOAT128
1344
1345/*
1346-------------------------------------------------------------------------------
1347Returns the result of converting the 64-bit two's complement integer `a' to
1348the quadruple-precision floating-point format.  The conversion is performed
1349according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1350-------------------------------------------------------------------------------
1351*/
1352float128 int64_to_float128( int64 a )
1353{
1354    flag zSign;
1355    uint64 absA;
1356    int8 shiftCount;
1357    int32 zExp;
1358    bits64 zSig0, zSig1;
1359
1360    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1361    zSign = ( a < 0 );
1362    absA = zSign ? - a : a;
1363    shiftCount = countLeadingZeros64( absA ) + 49;
1364    zExp = 0x406E - shiftCount;
1365    if ( 64 <= shiftCount ) {
1366        zSig1 = 0;
1367        zSig0 = absA;
1368        shiftCount -= 64;
1369    }
1370    else {
1371        zSig1 = absA;
1372        zSig0 = 0;
1373    }
1374    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1375    return packFloat128( zSign, zExp, zSig0, zSig1 );
1376
1377}
1378
1379#endif
1380#endif /* !SOFTFLOAT_FOR_GCC */
1381
1382#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1383/*
1384-------------------------------------------------------------------------------
1385Returns the result of converting the single-precision floating-point value
1386`a' to the 32-bit two's complement integer format.  The conversion is
1387performed according to the IEC/IEEE Standard for Binary Floating-Point
1388Arithmetic---which means in particular that the conversion is rounded
1389according to the current rounding mode.  If `a' is a NaN, the largest
1390positive integer is returned.  Otherwise, if the conversion overflows, the
1391largest integer with the same sign as `a' is returned.
1392-------------------------------------------------------------------------------
1393*/
1394int32 float32_to_int32( float32 a )
1395{
1396    flag aSign;
1397    int16 aExp, shiftCount;
1398    bits32 aSig;
1399    bits64 aSig64;
1400
1401    aSig = extractFloat32Frac( a );
1402    aExp = extractFloat32Exp( a );
1403    aSign = extractFloat32Sign( a );
1404    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1405    if ( aExp ) aSig |= 0x00800000;
1406    shiftCount = 0xAF - aExp;
1407    aSig64 = aSig;
1408    aSig64 <<= 32;
1409    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1410    return roundAndPackInt32( aSign, aSig64 );
1411
1412}
1413#endif /* !SOFTFLOAT_FOR_GCC */
1414
1415/*
1416-------------------------------------------------------------------------------
1417Returns the result of converting the single-precision floating-point value
1418`a' to the 32-bit two's complement integer format.  The conversion is
1419performed according to the IEC/IEEE Standard for Binary Floating-Point
1420Arithmetic, except that the conversion is always rounded toward zero.
1421If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1422the conversion overflows, the largest integer with the same sign as `a' is
1423returned.
1424-------------------------------------------------------------------------------
1425*/
1426int32 float32_to_int32_round_to_zero( float32 a )
1427{
1428    flag aSign;
1429    int16 aExp, shiftCount;
1430    bits32 aSig;
1431    int32 z;
1432
1433    aSig = extractFloat32Frac( a );
1434    aExp = extractFloat32Exp( a );
1435    aSign = extractFloat32Sign( a );
1436    shiftCount = aExp - 0x9E;
1437    if ( 0 <= shiftCount ) {
1438        if ( a != 0xCF000000 ) {
1439            float_raise( float_flag_invalid );
1440            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1441        }
1442        return (sbits32) 0x80000000;
1443    }
1444    else if ( aExp <= 0x7E ) {
1445        if ( aExp | aSig ) float_set_inexact();
1446        return 0;
1447    }
1448    aSig = ( aSig | 0x00800000 )<<8;
1449    z = aSig>>( - shiftCount );
1450    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1451        float_set_inexact();
1452    }
1453    if ( aSign ) z = - z;
1454    return z;
1455
1456}
1457
1458#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
1459/*
1460-------------------------------------------------------------------------------
1461Returns the result of converting the single-precision floating-point value
1462`a' to the 64-bit two's complement integer format.  The conversion is
1463performed according to the IEC/IEEE Standard for Binary Floating-Point
1464Arithmetic---which means in particular that the conversion is rounded
1465according to the current rounding mode.  If `a' is a NaN, the largest
1466positive integer is returned.  Otherwise, if the conversion overflows, the
1467largest integer with the same sign as `a' is returned.
1468-------------------------------------------------------------------------------
1469*/
1470int64 float32_to_int64( float32 a )
1471{
1472    flag aSign;
1473    int16 aExp, shiftCount;
1474    bits32 aSig;
1475    bits64 aSig64, aSigExtra;
1476
1477    aSig = extractFloat32Frac( a );
1478    aExp = extractFloat32Exp( a );
1479    aSign = extractFloat32Sign( a );
1480    shiftCount = 0xBE - aExp;
1481    if ( shiftCount < 0 ) {
1482        float_raise( float_flag_invalid );
1483        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1484            return LIT64( 0x7FFFFFFFFFFFFFFF );
1485        }
1486        return (sbits64) LIT64( 0x8000000000000000 );
1487    }
1488    if ( aExp ) aSig |= 0x00800000;
1489    aSig64 = aSig;
1490    aSig64 <<= 40;
1491    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1492    return roundAndPackInt64( aSign, aSig64, aSigExtra );
1493
1494}
1495
1496/*
1497-------------------------------------------------------------------------------
1498Returns the result of converting the single-precision floating-point value
1499`a' to the 64-bit two's complement integer format.  The conversion is
1500performed according to the IEC/IEEE Standard for Binary Floating-Point
1501Arithmetic, except that the conversion is always rounded toward zero.  If
1502`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1503conversion overflows, the largest integer with the same sign as `a' is
1504returned.
1505-------------------------------------------------------------------------------
1506*/
1507int64 float32_to_int64_round_to_zero( float32 a )
1508{
1509    flag aSign;
1510    int16 aExp, shiftCount;
1511    bits32 aSig;
1512    bits64 aSig64;
1513    int64 z;
1514
1515    aSig = extractFloat32Frac( a );
1516    aExp = extractFloat32Exp( a );
1517    aSign = extractFloat32Sign( a );
1518    shiftCount = aExp - 0xBE;
1519    if ( 0 <= shiftCount ) {
1520        if ( a != 0xDF000000 ) {
1521            float_raise( float_flag_invalid );
1522            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1523                return LIT64( 0x7FFFFFFFFFFFFFFF );
1524            }
1525        }
1526        return (sbits64) LIT64( 0x8000000000000000 );
1527    }
1528    else if ( aExp <= 0x7E ) {
1529        if ( aExp | aSig ) float_set_inexact();
1530        return 0;
1531    }
1532    aSig64 = aSig | 0x00800000;
1533    aSig64 <<= 40;
1534    z = aSig64>>( - shiftCount );
1535    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1536        float_set_inexact();
1537    }
1538    if ( aSign ) z = - z;
1539    return z;
1540
1541}
1542#endif /* !SOFTFLOAT_FOR_GCC */
1543
1544/*
1545-------------------------------------------------------------------------------
1546Returns the result of converting the single-precision floating-point value
1547`a' to the double-precision floating-point format.  The conversion is
1548performed according to the IEC/IEEE Standard for Binary Floating-Point
1549Arithmetic.
1550-------------------------------------------------------------------------------
1551*/
1552float64 float32_to_float64( float32 a )
1553{
1554    flag aSign;
1555    int16 aExp;
1556    bits32 aSig;
1557
1558    aSig = extractFloat32Frac( a );
1559    aExp = extractFloat32Exp( a );
1560    aSign = extractFloat32Sign( a );
1561    if ( aExp == 0xFF ) {
1562        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
1563        return packFloat64( aSign, 0x7FF, 0 );
1564    }
1565    if ( aExp == 0 ) {
1566        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1567        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1568        --aExp;
1569    }
1570    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1571
1572}
1573
1574#ifdef FLOATX80
1575
1576/*
1577-------------------------------------------------------------------------------
1578Returns the result of converting the single-precision floating-point value
1579`a' to the extended double-precision floating-point format.  The conversion
1580is performed according to the IEC/IEEE Standard for Binary Floating-Point
1581Arithmetic.
1582-------------------------------------------------------------------------------
1583*/
1584floatx80 float32_to_floatx80( float32 a )
1585{
1586    flag aSign;
1587    int16 aExp;
1588    bits32 aSig;
1589
1590    aSig = extractFloat32Frac( a );
1591    aExp = extractFloat32Exp( a );
1592    aSign = extractFloat32Sign( a );
1593    if ( aExp == 0xFF ) {
1594        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
1595        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1596    }
1597    if ( aExp == 0 ) {
1598        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1599        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1600    }
1601    aSig |= 0x00800000;
1602    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1603
1604}
1605
1606#endif
1607
1608#ifdef FLOAT128
1609
1610/*
1611-------------------------------------------------------------------------------
1612Returns the result of converting the single-precision floating-point value
1613`a' to the double-precision floating-point format.  The conversion is
1614performed according to the IEC/IEEE Standard for Binary Floating-Point
1615Arithmetic.
1616-------------------------------------------------------------------------------
1617*/
1618float128 float32_to_float128( float32 a )
1619{
1620    flag aSign;
1621    int16 aExp;
1622    bits32 aSig;
1623
1624    aSig = extractFloat32Frac( a );
1625    aExp = extractFloat32Exp( a );
1626    aSign = extractFloat32Sign( a );
1627    if ( aExp == 0xFF ) {
1628        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
1629        return packFloat128( aSign, 0x7FFF, 0, 0 );
1630    }
1631    if ( aExp == 0 ) {
1632        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1633        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1634        --aExp;
1635    }
1636    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1637
1638}
1639
1640#endif
1641
1642#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1643/*
1644-------------------------------------------------------------------------------
1645Rounds the single-precision floating-point value `a' to an integer, and
1646returns the result as a single-precision floating-point value.  The
1647operation is performed according to the IEC/IEEE Standard for Binary
1648Floating-Point Arithmetic.
1649-------------------------------------------------------------------------------
1650*/
1651float32 float32_round_to_int( float32 a )
1652{
1653    flag aSign;
1654    int16 aExp;
1655    bits32 lastBitMask, roundBitsMask;
1656    int8 roundingMode;
1657    float32 z;
1658
1659    aExp = extractFloat32Exp( a );
1660    if ( 0x96 <= aExp ) {
1661        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1662            return propagateFloat32NaN( a, a );
1663        }
1664        return a;
1665    }
1666    if ( aExp <= 0x7E ) {
1667        if ( (bits32) ( a<<1 ) == 0 ) return a;
1668        float_set_inexact();
1669        aSign = extractFloat32Sign( a );
1670        switch ( float_rounding_mode() ) {
1671         case float_round_nearest_even:
1672            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1673                return packFloat32( aSign, 0x7F, 0 );
1674            }
1675            break;
1676         case float_round_down:
1677            return aSign ? 0xBF800000 : 0;
1678         case float_round_up:
1679            return aSign ? 0x80000000 : 0x3F800000;
1680        }
1681        return packFloat32( aSign, 0, 0 );
1682    }
1683    lastBitMask = 1;
1684    lastBitMask <<= 0x96 - aExp;
1685    roundBitsMask = lastBitMask - 1;
1686    z = a;
1687    roundingMode = float_rounding_mode();
1688    if ( roundingMode == float_round_nearest_even ) {
1689        z += lastBitMask>>1;
1690        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1691    }
1692    else if ( roundingMode != float_round_to_zero ) {
1693        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1694            z += roundBitsMask;
1695        }
1696    }
1697    z &= ~ roundBitsMask;
1698    if ( z != a ) float_set_inexact();
1699    return z;
1700
1701}
1702#endif /* !SOFTFLOAT_FOR_GCC */
1703
1704/*
1705-------------------------------------------------------------------------------
1706Returns the result of adding the absolute values of the single-precision
1707floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1708before being returned.  `zSign' is ignored if the result is a NaN.
1709The addition is performed according to the IEC/IEEE Standard for Binary
1710Floating-Point Arithmetic.
1711-------------------------------------------------------------------------------
1712*/
1713static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
1714{
1715    int16 aExp, bExp, zExp;
1716    bits32 aSig, bSig, zSig;
1717    int16 expDiff;
1718
1719    aSig = extractFloat32Frac( a );
1720    aExp = extractFloat32Exp( a );
1721    bSig = extractFloat32Frac( b );
1722    bExp = extractFloat32Exp( b );
1723    expDiff = aExp - bExp;
1724    aSig <<= 6;
1725    bSig <<= 6;
1726    if ( 0 < expDiff ) {
1727        if ( aExp == 0xFF ) {
1728            if ( aSig ) return propagateFloat32NaN( a, b );
1729            return a;
1730        }
1731        if ( bExp == 0 ) {
1732            --expDiff;
1733        }
1734        else {
1735            bSig |= 0x20000000;
1736        }
1737        shift32RightJamming( bSig, expDiff, &bSig );
1738        zExp = aExp;
1739    }
1740    else if ( expDiff < 0 ) {
1741        if ( bExp == 0xFF ) {
1742            if ( bSig ) return propagateFloat32NaN( a, b );
1743            return packFloat32( zSign, 0xFF, 0 );
1744        }
1745        if ( aExp == 0 ) {
1746            ++expDiff;
1747        }
1748        else {
1749            aSig |= 0x20000000;
1750        }
1751        shift32RightJamming( aSig, - expDiff, &aSig );
1752        zExp = bExp;
1753    }
1754    else {
1755        if ( aExp == 0xFF ) {
1756            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1757            return a;
1758        }
1759        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1760        zSig = 0x40000000 + aSig + bSig;
1761        zExp = aExp;
1762        goto roundAndPack;
1763    }
1764    aSig |= 0x20000000;
1765    zSig = ( aSig + bSig )<<1;
1766    --zExp;
1767    if ( (sbits32) zSig < 0 ) {
1768        zSig = aSig + bSig;
1769        ++zExp;
1770    }
1771 roundAndPack:
1772    return roundAndPackFloat32( zSign, zExp, zSig );
1773
1774}
1775
1776/*
1777-------------------------------------------------------------------------------
1778Returns the result of subtracting the absolute values of the single-
1779precision floating-point values `a' and `b'.  If `zSign' is 1, the
1780difference is negated before being returned.  `zSign' is ignored if the
1781result is a NaN.  The subtraction is performed according to the IEC/IEEE
1782Standard for Binary Floating-Point Arithmetic.
1783-------------------------------------------------------------------------------
1784*/
1785static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1786{
1787    int16 aExp, bExp, zExp;
1788    bits32 aSig, bSig, zSig;
1789    int16 expDiff;
1790
1791    aSig = extractFloat32Frac( a );
1792    aExp = extractFloat32Exp( a );
1793    bSig = extractFloat32Frac( b );
1794    bExp = extractFloat32Exp( b );
1795    expDiff = aExp - bExp;
1796    aSig <<= 7;
1797    bSig <<= 7;
1798    if ( 0 < expDiff ) goto aExpBigger;
1799    if ( expDiff < 0 ) goto bExpBigger;
1800    if ( aExp == 0xFF ) {
1801        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1802        float_raise( float_flag_invalid );
1803        return float32_default_nan;
1804    }
1805    if ( aExp == 0 ) {
1806        aExp = 1;
1807        bExp = 1;
1808    }
1809    if ( bSig < aSig ) goto aBigger;
1810    if ( aSig < bSig ) goto bBigger;
1811    return packFloat32( float_rounding_mode() == float_round_down, 0, 0 );
1812 bExpBigger:
1813    if ( bExp == 0xFF ) {
1814        if ( bSig ) return propagateFloat32NaN( a, b );
1815        return packFloat32( zSign ^ 1, 0xFF, 0 );
1816    }
1817    if ( aExp == 0 ) {
1818        ++expDiff;
1819    }
1820    else {
1821        aSig |= 0x40000000;
1822    }
1823    shift32RightJamming( aSig, - expDiff, &aSig );
1824    bSig |= 0x40000000;
1825 bBigger:
1826    zSig = bSig - aSig;
1827    zExp = bExp;
1828    zSign ^= 1;
1829    goto normalizeRoundAndPack;
1830 aExpBigger:
1831    if ( aExp == 0xFF ) {
1832        if ( aSig ) return propagateFloat32NaN( a, b );
1833        return a;
1834    }
1835    if ( bExp == 0 ) {
1836        --expDiff;
1837    }
1838    else {
1839        bSig |= 0x40000000;
1840    }
1841    shift32RightJamming( bSig, expDiff, &bSig );
1842    aSig |= 0x40000000;
1843 aBigger:
1844    zSig = aSig - bSig;
1845    zExp = aExp;
1846 normalizeRoundAndPack:
1847    --zExp;
1848    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
1849
1850}
1851
1852/*
1853-------------------------------------------------------------------------------
1854Returns the result of adding the single-precision floating-point values `a'
1855and `b'.  The operation is performed according to the IEC/IEEE Standard for
1856Binary Floating-Point Arithmetic.
1857-------------------------------------------------------------------------------
1858*/
1859float32 float32_add( float32 a, float32 b )
1860{
1861    flag aSign, bSign;
1862
1863    aSign = extractFloat32Sign( a );
1864    bSign = extractFloat32Sign( b );
1865    if ( aSign == bSign ) {
1866        return addFloat32Sigs( a, b, aSign );
1867    }
1868    else {
1869        return subFloat32Sigs( a, b, aSign );
1870    }
1871
1872}
1873
1874/*
1875-------------------------------------------------------------------------------
1876Returns the result of subtracting the single-precision floating-point values
1877`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1878for Binary Floating-Point Arithmetic.
1879-------------------------------------------------------------------------------
1880*/
1881float32 float32_sub( float32 a, float32 b )
1882{
1883    flag aSign, bSign;
1884
1885    aSign = extractFloat32Sign( a );
1886    bSign = extractFloat32Sign( b );
1887    if ( aSign == bSign ) {
1888        return subFloat32Sigs( a, b, aSign );
1889    }
1890    else {
1891        return addFloat32Sigs( a, b, aSign );
1892    }
1893
1894}
1895
1896/*
1897-------------------------------------------------------------------------------
1898Returns the result of multiplying the single-precision floating-point values
1899`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1900for Binary Floating-Point Arithmetic.
1901-------------------------------------------------------------------------------
1902*/
1903float32 float32_mul( float32 a, float32 b )
1904{
1905    flag aSign, bSign, zSign;
1906    int16 aExp, bExp, zExp;
1907    bits32 aSig, bSig;
1908    bits64 zSig64;
1909    bits32 zSig;
1910
1911    aSig = extractFloat32Frac( a );
1912    aExp = extractFloat32Exp( a );
1913    aSign = extractFloat32Sign( a );
1914    bSig = extractFloat32Frac( b );
1915    bExp = extractFloat32Exp( b );
1916    bSign = extractFloat32Sign( b );
1917    zSign = aSign ^ bSign;
1918    if ( aExp == 0xFF ) {
1919        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1920            return propagateFloat32NaN( a, b );
1921        }
1922        if ( ( bExp | bSig ) == 0 ) {
1923            float_raise( float_flag_invalid );
1924            return float32_default_nan;
1925        }
1926        return packFloat32( zSign, 0xFF, 0 );
1927    }
1928    if ( bExp == 0xFF ) {
1929        if ( bSig ) return propagateFloat32NaN( a, b );
1930        if ( ( aExp | aSig ) == 0 ) {
1931            float_raise( float_flag_invalid );
1932            return float32_default_nan;
1933        }
1934        return packFloat32( zSign, 0xFF, 0 );
1935    }
1936    if ( aExp == 0 ) {
1937        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1938        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1939    }
1940    if ( bExp == 0 ) {
1941        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1942        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1943    }
1944    zExp = aExp + bExp - 0x7F;
1945    aSig = ( aSig | 0x00800000 )<<7;
1946    bSig = ( bSig | 0x00800000 )<<8;
1947    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1948    zSig = zSig64;
1949    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1950        zSig <<= 1;
1951        --zExp;
1952    }
1953    return roundAndPackFloat32( zSign, zExp, zSig );
1954
1955}
1956
1957/*
1958-------------------------------------------------------------------------------
1959Returns the result of dividing the single-precision floating-point value `a'
1960by the corresponding value `b'.  The operation is performed according to the
1961IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1962-------------------------------------------------------------------------------
1963*/
1964float32 float32_div( float32 a, float32 b )
1965{
1966    flag aSign, bSign, zSign;
1967    int16 aExp, bExp, zExp;
1968    bits32 aSig, bSig, zSig;
1969
1970    aSig = extractFloat32Frac( a );
1971    aExp = extractFloat32Exp( a );
1972    aSign = extractFloat32Sign( a );
1973    bSig = extractFloat32Frac( b );
1974    bExp = extractFloat32Exp( b );
1975    bSign = extractFloat32Sign( b );
1976    zSign = aSign ^ bSign;
1977    if ( aExp == 0xFF ) {
1978        if ( aSig ) return propagateFloat32NaN( a, b );
1979        if ( bExp == 0xFF ) {
1980            if ( bSig ) return propagateFloat32NaN( a, b );
1981            float_raise( float_flag_invalid );
1982            return float32_default_nan;
1983        }
1984        return packFloat32( zSign, 0xFF, 0 );
1985    }
1986    if ( bExp == 0xFF ) {
1987        if ( bSig ) return propagateFloat32NaN( a, b );
1988        return packFloat32( zSign, 0, 0 );
1989    }
1990    if ( bExp == 0 ) {
1991        if ( bSig == 0 ) {
1992            if ( ( aExp | aSig ) == 0 ) {
1993                float_raise( float_flag_invalid );
1994                return float32_default_nan;
1995            }
1996            float_raise( float_flag_divbyzero );
1997            return packFloat32( zSign, 0xFF, 0 );
1998        }
1999        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2000    }
2001    if ( aExp == 0 ) {
2002        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
2003        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2004    }
2005    zExp = aExp - bExp + 0x7D;
2006    aSig = ( aSig | 0x00800000 )<<7;
2007    bSig = ( bSig | 0x00800000 )<<8;
2008    if ( bSig <= ( aSig + aSig ) ) {
2009        aSig >>= 1;
2010        ++zExp;
2011    }
2012    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
2013    if ( ( zSig & 0x3F ) == 0 ) {
2014        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
2015    }
2016    return roundAndPackFloat32( zSign, zExp, zSig );
2017
2018}
2019
2020#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2021/*
2022-------------------------------------------------------------------------------
2023Returns the remainder of the single-precision floating-point value `a'
2024with respect to the corresponding value `b'.  The operation is performed
2025according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2026-------------------------------------------------------------------------------
2027*/
2028float32 float32_rem( float32 a, float32 b )
2029{
2030    flag aSign, bSign, zSign;
2031    int16 aExp, bExp, expDiff;
2032    bits32 aSig, bSig;
2033    bits32 q;
2034    bits64 aSig64, bSig64, q64;
2035    bits32 alternateASig;
2036    sbits32 sigMean;
2037
2038    aSig = extractFloat32Frac( a );
2039    aExp = extractFloat32Exp( a );
2040    aSign = extractFloat32Sign( a );
2041    bSig = extractFloat32Frac( b );
2042    bExp = extractFloat32Exp( b );
2043    bSign = extractFloat32Sign( b );
2044    if ( aExp == 0xFF ) {
2045        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2046            return propagateFloat32NaN( a, b );
2047        }
2048        float_raise( float_flag_invalid );
2049        return float32_default_nan;
2050    }
2051    if ( bExp == 0xFF ) {
2052        if ( bSig ) return propagateFloat32NaN( a, b );
2053        return a;
2054    }
2055    if ( bExp == 0 ) {
2056        if ( bSig == 0 ) {
2057            float_raise( float_flag_invalid );
2058            return float32_default_nan;
2059        }
2060        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2061    }
2062    if ( aExp == 0 ) {
2063        if ( aSig == 0 ) return a;
2064        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2065    }
2066    expDiff = aExp - bExp;
2067    aSig |= 0x00800000;
2068    bSig |= 0x00800000;
2069    if ( expDiff < 32 ) {
2070        aSig <<= 8;
2071        bSig <<= 8;
2072        if ( expDiff < 0 ) {
2073            if ( expDiff < -1 ) return a;
2074            aSig >>= 1;
2075        }
2076        q = ( bSig <= aSig );
2077        if ( q ) aSig -= bSig;
2078        if ( 0 < expDiff ) {
2079            q = ( ( (bits64) aSig )<<32 ) / bSig;
2080            q >>= 32 - expDiff;
2081            bSig >>= 2;
2082            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2083        }
2084        else {
2085            aSig >>= 2;
2086            bSig >>= 2;
2087        }
2088    }
2089    else {
2090        if ( bSig <= aSig ) aSig -= bSig;
2091        aSig64 = ( (bits64) aSig )<<40;
2092        bSig64 = ( (bits64) bSig )<<40;
2093        expDiff -= 64;
2094        while ( 0 < expDiff ) {
2095            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2096            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2097            aSig64 = - ( ( bSig * q64 )<<38 );
2098            expDiff -= 62;
2099        }
2100        expDiff += 64;
2101        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2102        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2103        q = q64>>( 64 - expDiff );
2104        bSig <<= 6;
2105        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2106    }
2107    do {
2108        alternateASig = aSig;
2109        ++q;
2110        aSig -= bSig;
2111    } while ( 0 <= (sbits32) aSig );
2112    sigMean = aSig + alternateASig;
2113    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2114        aSig = alternateASig;
2115    }
2116    zSign = ( (sbits32) aSig < 0 );
2117    if ( zSign ) aSig = - aSig;
2118    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
2119
2120}
2121#endif /* !SOFTFLOAT_FOR_GCC */
2122
2123#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2124/*
2125-------------------------------------------------------------------------------
2126Returns the square root of the single-precision floating-point value `a'.
2127The operation is performed according to the IEC/IEEE Standard for Binary
2128Floating-Point Arithmetic.
2129-------------------------------------------------------------------------------
2130*/
2131float32 float32_sqrt( float32 a )
2132{
2133    flag aSign;
2134    int16 aExp, zExp;
2135    bits32 aSig, zSig;
2136    bits64 rem, term;
2137
2138    aSig = extractFloat32Frac( a );
2139    aExp = extractFloat32Exp( a );
2140    aSign = extractFloat32Sign( a );
2141    if ( aExp == 0xFF ) {
2142        if ( aSig ) return propagateFloat32NaN( a, 0 );
2143        if ( ! aSign ) return a;
2144        float_raise( float_flag_invalid );
2145        return float32_default_nan;
2146    }
2147    if ( aSign ) {
2148        if ( ( aExp | aSig ) == 0 ) return a;
2149        float_raise( float_flag_invalid );
2150        return float32_default_nan;
2151    }
2152    if ( aExp == 0 ) {
2153        if ( aSig == 0 ) return 0;
2154        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2155    }
2156    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2157    aSig = ( aSig | 0x00800000 )<<8;
2158    zSig = estimateSqrt32( aExp, aSig ) + 2;
2159    if ( ( zSig & 0x7F ) <= 5 ) {
2160        if ( zSig < 2 ) {
2161            zSig = 0x7FFFFFFF;
2162            goto roundAndPack;
2163        }
2164        aSig >>= aExp & 1;
2165        term = ( (bits64) zSig ) * zSig;
2166        rem = ( ( (bits64) aSig )<<32 ) - term;
2167        while ( (sbits64) rem < 0 ) {
2168            --zSig;
2169            rem += ( ( (bits64) zSig )<<1 ) | 1;
2170        }
2171        zSig |= ( rem != 0 );
2172    }
2173    shift32RightJamming( zSig, 1, &zSig );
2174 roundAndPack:
2175    return roundAndPackFloat32( 0, zExp, zSig );
2176
2177}
2178#endif /* !SOFTFLOAT_FOR_GCC */
2179
2180/*
2181-------------------------------------------------------------------------------
2182Returns 1 if the single-precision floating-point value `a' is equal to
2183the corresponding value `b', and 0 otherwise.  The comparison is performed
2184according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2185-------------------------------------------------------------------------------
2186*/
2187flag float32_eq( float32 a, float32 b )
2188{
2189
2190    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2191         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2192       ) {
2193        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2194            float_raise( float_flag_invalid );
2195        }
2196        return 0;
2197    }
2198    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2199
2200}
2201
2202/*
2203-------------------------------------------------------------------------------
2204Returns 1 if the single-precision floating-point value `a' is less than
2205or equal to the corresponding value `b', and 0 otherwise.  The comparison
2206is performed according to the IEC/IEEE Standard for Binary Floating-Point
2207Arithmetic.
2208-------------------------------------------------------------------------------
2209*/
2210flag float32_le( float32 a, float32 b )
2211{
2212    flag aSign, bSign;
2213
2214    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2215         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2216       ) {
2217        float_raise( float_flag_invalid );
2218        return 0;
2219    }
2220    aSign = extractFloat32Sign( a );
2221    bSign = extractFloat32Sign( b );
2222    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2223    return ( a == b ) || ( aSign ^ ( a < b ) );
2224
2225}
2226
2227/*
2228-------------------------------------------------------------------------------
2229Returns 1 if the single-precision floating-point value `a' is less than
2230the corresponding value `b', and 0 otherwise.  The comparison is performed
2231according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2232-------------------------------------------------------------------------------
2233*/
2234flag float32_lt( float32 a, float32 b )
2235{
2236    flag aSign, bSign;
2237
2238    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2239         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2240       ) {
2241        float_raise( float_flag_invalid );
2242        return 0;
2243    }
2244    aSign = extractFloat32Sign( a );
2245    bSign = extractFloat32Sign( b );
2246    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2247    return ( a != b ) && ( aSign ^ ( a < b ) );
2248
2249}
2250
2251#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2252/*
2253-------------------------------------------------------------------------------
2254Returns 1 if the single-precision floating-point value `a' is equal to
2255the corresponding value `b', and 0 otherwise.  The invalid exception is
2256raised if either operand is a NaN.  Otherwise, the comparison is performed
2257according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2258-------------------------------------------------------------------------------
2259*/
2260flag float32_eq_signaling( float32 a, float32 b )
2261{
2262
2263    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2264         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2265       ) {
2266        float_raise( float_flag_invalid );
2267        return 0;
2268    }
2269    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2270
2271}
2272
2273/*
2274-------------------------------------------------------------------------------
2275Returns 1 if the single-precision floating-point value `a' is less than or
2276equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2277cause an exception.  Otherwise, the comparison is performed according to the
2278IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2279-------------------------------------------------------------------------------
2280*/
2281flag float32_le_quiet( float32 a, float32 b )
2282{
2283    flag aSign, bSign;
2284
2285    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2286         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2287       ) {
2288        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2289            float_raise( float_flag_invalid );
2290        }
2291        return 0;
2292    }
2293    aSign = extractFloat32Sign( a );
2294    bSign = extractFloat32Sign( b );
2295    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2296    return ( a == b ) || ( aSign ^ ( a < b ) );
2297
2298}
2299
2300/*
2301-------------------------------------------------------------------------------
2302Returns 1 if the single-precision floating-point value `a' is less than
2303the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2304exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2305Standard for Binary Floating-Point Arithmetic.
2306-------------------------------------------------------------------------------
2307*/
2308flag float32_lt_quiet( float32 a, float32 b )
2309{
2310    flag aSign, bSign;
2311
2312    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2313         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2314       ) {
2315        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2316            float_raise( float_flag_invalid );
2317        }
2318        return 0;
2319    }
2320    aSign = extractFloat32Sign( a );
2321    bSign = extractFloat32Sign( b );
2322    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2323    return ( a != b ) && ( aSign ^ ( a < b ) );
2324
2325}
2326#endif /* !SOFTFLOAT_FOR_GCC */
2327
2328#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2329/*
2330-------------------------------------------------------------------------------
2331Returns the result of converting the double-precision floating-point value
2332`a' to the 32-bit two's complement integer format.  The conversion is
2333performed according to the IEC/IEEE Standard for Binary Floating-Point
2334Arithmetic---which means in particular that the conversion is rounded
2335according to the current rounding mode.  If `a' is a NaN, the largest
2336positive integer is returned.  Otherwise, if the conversion overflows, the
2337largest integer with the same sign as `a' is returned.
2338-------------------------------------------------------------------------------
2339*/
2340int32 float64_to_int32( float64 a )
2341{
2342    flag aSign;
2343    int16 aExp, shiftCount;
2344    bits64 aSig;
2345
2346    aSig = extractFloat64Frac( a );
2347    aExp = extractFloat64Exp( a );
2348    aSign = extractFloat64Sign( a );
2349    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2350    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2351    shiftCount = 0x42C - aExp;
2352    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2353    return roundAndPackInt32( aSign, aSig );
2354
2355}
2356#endif /* !SOFTFLOAT_FOR_GCC */
2357
2358/*
2359-------------------------------------------------------------------------------
2360Returns the result of converting the double-precision floating-point value
2361`a' to the 32-bit two's complement integer format.  The conversion is
2362performed according to the IEC/IEEE Standard for Binary Floating-Point
2363Arithmetic, except that the conversion is always rounded toward zero.
2364If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2365the conversion overflows, the largest integer with the same sign as `a' is
2366returned.
2367-------------------------------------------------------------------------------
2368*/
2369int32 float64_to_int32_round_to_zero( float64 a )
2370{
2371    flag aSign;
2372    int16 aExp, shiftCount;
2373    bits64 aSig, savedASig;
2374    int32 z;
2375
2376    aSig = extractFloat64Frac( a );
2377    aExp = extractFloat64Exp( a );
2378    aSign = extractFloat64Sign( a );
2379    if ( 0x41E < aExp ) {
2380        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2381        goto invalid;
2382    }
2383    else if ( aExp < 0x3FF ) {
2384        if ( aExp || aSig ) float_set_inexact();
2385        return 0;
2386    }
2387    aSig |= LIT64( 0x0010000000000000 );
2388    shiftCount = 0x433 - aExp;
2389    savedASig = aSig;
2390    aSig >>= shiftCount;
2391    z = aSig;
2392    if ( aSign ) z = - z;
2393    if ( ( z < 0 ) ^ aSign ) {
2394 invalid:
2395        float_raise( float_flag_invalid );
2396        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2397    }
2398    if ( ( aSig<<shiftCount ) != savedASig ) {
2399        float_set_inexact();
2400    }
2401    return z;
2402
2403}
2404
2405#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2406/*
2407-------------------------------------------------------------------------------
2408Returns the result of converting the double-precision floating-point value
2409`a' to the 64-bit two's complement integer format.  The conversion is
2410performed according to the IEC/IEEE Standard for Binary Floating-Point
2411Arithmetic---which means in particular that the conversion is rounded
2412according to the current rounding mode.  If `a' is a NaN, the largest
2413positive integer is returned.  Otherwise, if the conversion overflows, the
2414largest integer with the same sign as `a' is returned.
2415-------------------------------------------------------------------------------
2416*/
2417int64 float64_to_int64( float64 a )
2418{
2419    flag aSign;
2420    int16 aExp, shiftCount;
2421    bits64 aSig, aSigExtra;
2422
2423    aSig = extractFloat64Frac( a );
2424    aExp = extractFloat64Exp( a );
2425    aSign = extractFloat64Sign( a );
2426    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2427    shiftCount = 0x433 - aExp;
2428    if ( shiftCount <= 0 ) {
2429        if ( 0x43E < aExp ) {
2430            float_raise( float_flag_invalid );
2431            if (    ! aSign
2432                 || (    ( aExp == 0x7FF )
2433                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2434               ) {
2435                return LIT64( 0x7FFFFFFFFFFFFFFF );
2436            }
2437            return (sbits64) LIT64( 0x8000000000000000 );
2438        }
2439        aSigExtra = 0;
2440        aSig <<= - shiftCount;
2441    }
2442    else {
2443        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2444    }
2445    return roundAndPackInt64( aSign, aSig, aSigExtra );
2446
2447}
2448
2449#ifdef __alpha__
2450/*
2451-------------------------------------------------------------------------------
2452Returns the result of converting the double-precision floating-point value
2453`a' to the 64-bit two's complement integer format.  The conversion is
2454performed according to the IEC/IEEE Standard for Binary Floating-Point
2455Arithmetic---which means in particular that the conversion is rounded
2456according to the current rounding mode.  If `a' is a NaN, the invalid
2457exception is raised and zero is returned.
2458-------------------------------------------------------------------------------
2459*/
2460int64 float64_to_int64_no_overflow( float64 a )
2461{
2462    flag aSign;
2463    int16 aExp, shiftCount;
2464    bits64 aSig, aSigExtra;
2465
2466    aSig = extractFloat64Frac( a );
2467    aExp = extractFloat64Exp( a );
2468    aSign = extractFloat64Sign( a );
2469    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2470    shiftCount = 0x433 - aExp;
2471    if ( shiftCount <= 0 ) {
2472        if ( 0x43E < aExp ) {
2473            float_raise( float_flag_invalid );
2474            return 0;
2475        }
2476        aSigExtra = 0;
2477        aSig <<= - shiftCount;
2478    }
2479    else {
2480        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2481    }
2482    return roundAndPackInt64NoOverflow( aSign, aSig, aSigExtra );
2483
2484}
2485#endif /* __alpha__ */
2486
2487/*
2488-------------------------------------------------------------------------------
2489Returns the result of converting the double-precision floating-point value
2490`a' to the 64-bit two's complement integer format.  The conversion is
2491performed according to the IEC/IEEE Standard for Binary Floating-Point
2492Arithmetic, except that the conversion is always rounded toward zero.
2493If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2494the conversion overflows, the largest integer with the same sign as `a' is
2495returned.
2496-------------------------------------------------------------------------------
2497*/
2498int64 float64_to_int64_round_to_zero( float64 a )
2499{
2500    flag aSign;
2501    int16 aExp, shiftCount;
2502    bits64 aSig;
2503    int64 z;
2504
2505    aSig = extractFloat64Frac( a );
2506    aExp = extractFloat64Exp( a );
2507    aSign = extractFloat64Sign( a );
2508    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2509    shiftCount = aExp - 0x433;
2510    if ( 0 <= shiftCount ) {
2511        if ( 0x43E <= aExp ) {
2512            if ( a != LIT64( 0xC3E0000000000000 ) ) {
2513                float_raise( float_flag_invalid );
2514                if (    ! aSign
2515                     || (    ( aExp == 0x7FF )
2516                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2517                   ) {
2518                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2519                }
2520            }
2521            return (sbits64) LIT64( 0x8000000000000000 );
2522        }
2523        z = aSig<<shiftCount;
2524    }
2525    else {
2526        if ( aExp < 0x3FE ) {
2527            if ( aExp | aSig ) float_set_inexact();
2528            return 0;
2529        }
2530        z = aSig>>( - shiftCount );
2531        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2532            float_set_inexact();
2533        }
2534    }
2535    if ( aSign ) z = - z;
2536    return z;
2537
2538}
2539#endif /* !SOFTFLOAT_FOR_GCC */
2540
2541/*
2542-------------------------------------------------------------------------------
2543Returns the result of converting the double-precision floating-point value
2544`a' to the single-precision floating-point format.  The conversion is
2545performed according to the IEC/IEEE Standard for Binary Floating-Point
2546Arithmetic.
2547-------------------------------------------------------------------------------
2548*/
2549float32 float64_to_float32( float64 a )
2550{
2551    flag aSign;
2552    int16 aExp;
2553    bits64 aSig;
2554    bits32 zSig;
2555
2556    aSig = extractFloat64Frac( a );
2557    aExp = extractFloat64Exp( a );
2558    aSign = extractFloat64Sign( a );
2559    if ( aExp == 0x7FF ) {
2560        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
2561        return packFloat32( aSign, 0xFF, 0 );
2562    }
2563    shift64RightJamming( aSig, 22, &aSig );
2564    zSig = aSig;
2565    if ( aExp || zSig ) {
2566        zSig |= 0x40000000;
2567        aExp -= 0x381;
2568    }
2569    return roundAndPackFloat32( aSign, aExp, zSig );
2570
2571}
2572
2573#ifdef FLOATX80
2574
2575/*
2576-------------------------------------------------------------------------------
2577Returns the result of converting the double-precision floating-point value
2578`a' to the extended double-precision floating-point format.  The conversion
2579is performed according to the IEC/IEEE Standard for Binary Floating-Point
2580Arithmetic.
2581-------------------------------------------------------------------------------
2582*/
2583floatx80 float64_to_floatx80( float64 a )
2584{
2585    flag aSign;
2586    int16 aExp;
2587    bits64 aSig;
2588
2589    aSig = extractFloat64Frac( a );
2590    aExp = extractFloat64Exp( a );
2591    aSign = extractFloat64Sign( a );
2592    if ( aExp == 0x7FF ) {
2593        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
2594        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2595    }
2596    if ( aExp == 0 ) {
2597        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2598        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2599    }
2600    return
2601        packFloatx80(
2602            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2603
2604}
2605
2606#endif
2607
2608#ifdef FLOAT128
2609
2610/*
2611-------------------------------------------------------------------------------
2612Returns the result of converting the double-precision floating-point value
2613`a' to the quadruple-precision floating-point format.  The conversion is
2614performed according to the IEC/IEEE Standard for Binary Floating-Point
2615Arithmetic.
2616-------------------------------------------------------------------------------
2617*/
2618float128 float64_to_float128( float64 a )
2619{
2620    flag aSign;
2621    int16 aExp;
2622    bits64 aSig, zSig0, zSig1;
2623
2624    aSig = extractFloat64Frac( a );
2625    aExp = extractFloat64Exp( a );
2626    aSign = extractFloat64Sign( a );
2627    if ( aExp == 0x7FF ) {
2628        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
2629        return packFloat128( aSign, 0x7FFF, 0, 0 );
2630    }
2631    if ( aExp == 0 ) {
2632        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2633        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2634        --aExp;
2635    }
2636    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2637    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2638
2639}
2640
2641#endif
2642
2643#ifndef SOFTFLOAT_FOR_GCC
2644/*
2645-------------------------------------------------------------------------------
2646Rounds the double-precision floating-point value `a' to an integer, and
2647returns the result as a double-precision floating-point value.  The
2648operation is performed according to the IEC/IEEE Standard for Binary
2649Floating-Point Arithmetic.
2650-------------------------------------------------------------------------------
2651*/
2652float64 float64_round_to_int( float64 a )
2653{
2654    flag aSign;
2655    int16 aExp;
2656    bits64 lastBitMask, roundBitsMask;
2657    int8 roundingMode;
2658    float64 z;
2659
2660    aExp = extractFloat64Exp( a );
2661    if ( 0x433 <= aExp ) {
2662        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2663            return propagateFloat64NaN( a, a );
2664        }
2665        return a;
2666    }
2667    if ( aExp < 0x3FF ) {
2668        if ( (bits64) ( a<<1 ) == 0 ) return a;
2669        float_set_inexact();
2670        aSign = extractFloat64Sign( a );
2671        switch ( float_rounding_mode() ) {
2672         case float_round_nearest_even:
2673            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2674                return packFloat64( aSign, 0x3FF, 0 );
2675            }
2676            break;
2677         case float_round_down:
2678            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
2679         case float_round_up:
2680            return
2681            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
2682        }
2683        return packFloat64( aSign, 0, 0 );
2684    }
2685    lastBitMask = 1;
2686    lastBitMask <<= 0x433 - aExp;
2687    roundBitsMask = lastBitMask - 1;
2688    z = a;
2689    roundingMode = float_rounding_mode();
2690    if ( roundingMode == float_round_nearest_even ) {
2691        z += lastBitMask>>1;
2692        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2693    }
2694    else if ( roundingMode != float_round_to_zero ) {
2695        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2696            z += roundBitsMask;
2697        }
2698    }
2699    z &= ~ roundBitsMask;
2700    if ( z != a ) float_set_inexact();
2701    return z;
2702
2703}
2704#endif
2705
2706/*
2707-------------------------------------------------------------------------------
2708Returns the result of adding the absolute values of the double-precision
2709floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
2710before being returned.  `zSign' is ignored if the result is a NaN.
2711The addition is performed according to the IEC/IEEE Standard for Binary
2712Floating-Point Arithmetic.
2713-------------------------------------------------------------------------------
2714*/
2715static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
2716{
2717    int16 aExp, bExp, zExp;
2718    bits64 aSig, bSig, zSig;
2719    int16 expDiff;
2720
2721    aSig = extractFloat64Frac( a );
2722    aExp = extractFloat64Exp( a );
2723    bSig = extractFloat64Frac( b );
2724    bExp = extractFloat64Exp( b );
2725    expDiff = aExp - bExp;
2726    aSig <<= 9;
2727    bSig <<= 9;
2728    if ( 0 < expDiff ) {
2729        if ( aExp == 0x7FF ) {
2730            if ( aSig ) return propagateFloat64NaN( a, b );
2731            return a;
2732        }
2733        if ( bExp == 0 ) {
2734            --expDiff;
2735        }
2736        else {
2737            bSig |= LIT64( 0x2000000000000000 );
2738        }
2739        shift64RightJamming( bSig, expDiff, &bSig );
2740        zExp = aExp;
2741    }
2742    else if ( expDiff < 0 ) {
2743        if ( bExp == 0x7FF ) {
2744            if ( bSig ) return propagateFloat64NaN( a, b );
2745            return packFloat64( zSign, 0x7FF, 0 );
2746        }
2747        if ( aExp == 0 ) {
2748            ++expDiff;
2749        }
2750        else {
2751            aSig |= LIT64( 0x2000000000000000 );
2752        }
2753        shift64RightJamming( aSig, - expDiff, &aSig );
2754        zExp = bExp;
2755    }
2756    else {
2757        if ( aExp == 0x7FF ) {
2758            if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2759            return a;
2760        }
2761        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2762        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2763        zExp = aExp;
2764        goto roundAndPack;
2765    }
2766    aSig |= LIT64( 0x2000000000000000 );
2767    zSig = ( aSig + bSig )<<1;
2768    --zExp;
2769    if ( (sbits64) zSig < 0 ) {
2770        zSig = aSig + bSig;
2771        ++zExp;
2772    }
2773 roundAndPack:
2774    return roundAndPackFloat64( zSign, zExp, zSig );
2775
2776}
2777
2778/*
2779-------------------------------------------------------------------------------
2780Returns the result of subtracting the absolute values of the double-
2781precision floating-point values `a' and `b'.  If `zSign' is 1, the
2782difference is negated before being returned.  `zSign' is ignored if the
2783result is a NaN.  The subtraction is performed according to the IEC/IEEE
2784Standard for Binary Floating-Point Arithmetic.
2785-------------------------------------------------------------------------------
2786*/
2787static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
2788{
2789    int16 aExp, bExp, zExp;
2790    bits64 aSig, bSig, zSig;
2791    int16 expDiff;
2792
2793    aSig = extractFloat64Frac( a );
2794    aExp = extractFloat64Exp( a );
2795    bSig = extractFloat64Frac( b );
2796    bExp = extractFloat64Exp( b );
2797    expDiff = aExp - bExp;
2798    aSig <<= 10;
2799    bSig <<= 10;
2800    if ( 0 < expDiff ) goto aExpBigger;
2801    if ( expDiff < 0 ) goto bExpBigger;
2802    if ( aExp == 0x7FF ) {
2803        if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2804        float_raise( float_flag_invalid );
2805        return float64_default_nan;
2806    }
2807    if ( aExp == 0 ) {
2808        aExp = 1;
2809        bExp = 1;
2810    }
2811    if ( bSig < aSig ) goto aBigger;
2812    if ( aSig < bSig ) goto bBigger;
2813    return packFloat64( float_rounding_mode() == float_round_down, 0, 0 );
2814 bExpBigger:
2815    if ( bExp == 0x7FF ) {
2816        if ( bSig ) return propagateFloat64NaN( a, b );
2817        return packFloat64( zSign ^ 1, 0x7FF, 0 );
2818    }
2819    if ( aExp == 0 ) {
2820        ++expDiff;
2821    }
2822    else {
2823        aSig |= LIT64( 0x4000000000000000 );
2824    }
2825    shift64RightJamming( aSig, - expDiff, &aSig );
2826    bSig |= LIT64( 0x4000000000000000 );
2827 bBigger:
2828    zSig = bSig - aSig;
2829    zExp = bExp;
2830    zSign ^= 1;
2831    goto normalizeRoundAndPack;
2832 aExpBigger:
2833    if ( aExp == 0x7FF ) {
2834        if ( aSig ) return propagateFloat64NaN( a, b );
2835        return a;
2836    }
2837    if ( bExp == 0 ) {
2838        --expDiff;
2839    }
2840    else {
2841        bSig |= LIT64( 0x4000000000000000 );
2842    }
2843    shift64RightJamming( bSig, expDiff, &bSig );
2844    aSig |= LIT64( 0x4000000000000000 );
2845 aBigger:
2846    zSig = aSig - bSig;
2847    zExp = aExp;
2848 normalizeRoundAndPack:
2849    --zExp;
2850    return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
2851
2852}
2853
2854/*
2855-------------------------------------------------------------------------------
2856Returns the result of adding the double-precision floating-point values `a'
2857and `b'.  The operation is performed according to the IEC/IEEE Standard for
2858Binary Floating-Point Arithmetic.
2859-------------------------------------------------------------------------------
2860*/
2861float64 float64_add( float64 a, float64 b )
2862{
2863    flag aSign, bSign;
2864
2865    aSign = extractFloat64Sign( a );
2866    bSign = extractFloat64Sign( b );
2867    if ( aSign == bSign ) {
2868        return addFloat64Sigs( a, b, aSign );
2869    }
2870    else {
2871        return subFloat64Sigs( a, b, aSign );
2872    }
2873
2874}
2875
2876/*
2877-------------------------------------------------------------------------------
2878Returns the result of subtracting the double-precision floating-point values
2879`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2880for Binary Floating-Point Arithmetic.
2881-------------------------------------------------------------------------------
2882*/
2883float64 float64_sub( float64 a, float64 b )
2884{
2885    flag aSign, bSign;
2886
2887    aSign = extractFloat64Sign( a );
2888    bSign = extractFloat64Sign( b );
2889    if ( aSign == bSign ) {
2890        return subFloat64Sigs( a, b, aSign );
2891    }
2892    else {
2893        return addFloat64Sigs( a, b, aSign );
2894    }
2895
2896}
2897
2898/*
2899-------------------------------------------------------------------------------
2900Returns the result of multiplying the double-precision floating-point values
2901`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2902for Binary Floating-Point Arithmetic.
2903-------------------------------------------------------------------------------
2904*/
2905float64 float64_mul( float64 a, float64 b )
2906{
2907    flag aSign, bSign, zSign;
2908    int16 aExp, bExp, zExp;
2909    bits64 aSig, bSig, zSig0, zSig1;
2910
2911    aSig = extractFloat64Frac( a );
2912    aExp = extractFloat64Exp( a );
2913    aSign = extractFloat64Sign( a );
2914    bSig = extractFloat64Frac( b );
2915    bExp = extractFloat64Exp( b );
2916    bSign = extractFloat64Sign( b );
2917    zSign = aSign ^ bSign;
2918    if ( aExp == 0x7FF ) {
2919        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2920            return propagateFloat64NaN( a, b );
2921        }
2922        if ( ( bExp | bSig ) == 0 ) {
2923            float_raise( float_flag_invalid );
2924            return float64_default_nan;
2925        }
2926        return packFloat64( zSign, 0x7FF, 0 );
2927    }
2928    if ( bExp == 0x7FF ) {
2929        if ( bSig ) return propagateFloat64NaN( a, b );
2930        if ( ( aExp | aSig ) == 0 ) {
2931            float_raise( float_flag_invalid );
2932            return float64_default_nan;
2933        }
2934        return packFloat64( zSign, 0x7FF, 0 );
2935    }
2936    if ( aExp == 0 ) {
2937        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2938        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2939    }
2940    if ( bExp == 0 ) {
2941        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2942        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2943    }
2944    zExp = aExp + bExp - 0x3FF;
2945    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2946    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2947    mul64To128( aSig, bSig, &zSig0, &zSig1 );
2948    zSig0 |= ( zSig1 != 0 );
2949    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2950        zSig0 <<= 1;
2951        --zExp;
2952    }
2953    return roundAndPackFloat64( zSign, zExp, zSig0 );
2954
2955}
2956
2957/*
2958-------------------------------------------------------------------------------
2959Returns the result of dividing the double-precision floating-point value `a'
2960by the corresponding value `b'.  The operation is performed according to
2961the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2962-------------------------------------------------------------------------------
2963*/
2964float64 float64_div( float64 a, float64 b )
2965{
2966    flag aSign, bSign, zSign;
2967    int16 aExp, bExp, zExp;
2968    bits64 aSig, bSig, zSig;
2969    bits64 rem0, rem1;
2970    bits64 term0, term1;
2971
2972    aSig = extractFloat64Frac( a );
2973    aExp = extractFloat64Exp( a );
2974    aSign = extractFloat64Sign( a );
2975    bSig = extractFloat64Frac( b );
2976    bExp = extractFloat64Exp( b );
2977    bSign = extractFloat64Sign( b );
2978    zSign = aSign ^ bSign;
2979    if ( aExp == 0x7FF ) {
2980        if ( aSig ) return propagateFloat64NaN( a, b );
2981        if ( bExp == 0x7FF ) {
2982            if ( bSig ) return propagateFloat64NaN( a, b );
2983            float_raise( float_flag_invalid );
2984            return float64_default_nan;
2985        }
2986        return packFloat64( zSign, 0x7FF, 0 );
2987    }
2988    if ( bExp == 0x7FF ) {
2989        if ( bSig ) return propagateFloat64NaN( a, b );
2990        return packFloat64( zSign, 0, 0 );
2991    }
2992    if ( bExp == 0 ) {
2993        if ( bSig == 0 ) {
2994            if ( ( aExp | aSig ) == 0 ) {
2995                float_raise( float_flag_invalid );
2996                return float64_default_nan;
2997            }
2998            float_raise( float_flag_divbyzero );
2999            return packFloat64( zSign, 0x7FF, 0 );
3000        }
3001        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3002    }
3003    if ( aExp == 0 ) {
3004        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
3005        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3006    }
3007    zExp = aExp - bExp + 0x3FD;
3008    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
3009    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3010    if ( bSig <= ( aSig + aSig ) ) {
3011        aSig >>= 1;
3012        ++zExp;
3013    }
3014    zSig = estimateDiv128To64( aSig, 0, bSig );
3015    if ( ( zSig & 0x1FF ) <= 2 ) {
3016        mul64To128( bSig, zSig, &term0, &term1 );
3017        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3018        while ( (sbits64) rem0 < 0 ) {
3019            --zSig;
3020            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3021        }
3022        zSig |= ( rem1 != 0 );
3023    }
3024    return roundAndPackFloat64( zSign, zExp, zSig );
3025
3026}
3027
3028#ifndef SOFTFLOAT_FOR_GCC
3029/*
3030-------------------------------------------------------------------------------
3031Returns the remainder of the double-precision floating-point value `a'
3032with respect to the corresponding value `b'.  The operation is performed
3033according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3034-------------------------------------------------------------------------------
3035*/
3036float64 float64_rem( float64 a, float64 b )
3037{
3038    flag aSign, bSign, zSign;
3039    int16 aExp, bExp, expDiff;
3040    bits64 aSig, bSig;
3041    bits64 q, alternateASig;
3042    sbits64 sigMean;
3043
3044    aSig = extractFloat64Frac( a );
3045    aExp = extractFloat64Exp( a );
3046    aSign = extractFloat64Sign( a );
3047    bSig = extractFloat64Frac( b );
3048    bExp = extractFloat64Exp( b );
3049    bSign = extractFloat64Sign( b );
3050    if ( aExp == 0x7FF ) {
3051        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3052            return propagateFloat64NaN( a, b );
3053        }
3054        float_raise( float_flag_invalid );
3055        return float64_default_nan;
3056    }
3057    if ( bExp == 0x7FF ) {
3058        if ( bSig ) return propagateFloat64NaN( a, b );
3059        return a;
3060    }
3061    if ( bExp == 0 ) {
3062        if ( bSig == 0 ) {
3063            float_raise( float_flag_invalid );
3064            return float64_default_nan;
3065        }
3066        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3067    }
3068    if ( aExp == 0 ) {
3069        if ( aSig == 0 ) return a;
3070        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3071    }
3072    expDiff = aExp - bExp;
3073    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3074    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3075    if ( expDiff < 0 ) {
3076        if ( expDiff < -1 ) return a;
3077        aSig >>= 1;
3078    }
3079    q = ( bSig <= aSig );
3080    if ( q ) aSig -= bSig;
3081    expDiff -= 64;
3082    while ( 0 < expDiff ) {
3083        q = estimateDiv128To64( aSig, 0, bSig );
3084        q = ( 2 < q ) ? q - 2 : 0;
3085        aSig = - ( ( bSig>>2 ) * q );
3086        expDiff -= 62;
3087    }
3088    expDiff += 64;
3089    if ( 0 < expDiff ) {
3090        q = estimateDiv128To64( aSig, 0, bSig );
3091        q = ( 2 < q ) ? q - 2 : 0;
3092        q >>= 64 - expDiff;
3093        bSig >>= 2;
3094        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3095    }
3096    else {
3097        aSig >>= 2;
3098        bSig >>= 2;
3099    }
3100    do {
3101        alternateASig = aSig;
3102        ++q;
3103        aSig -= bSig;
3104    } while ( 0 <= (sbits64) aSig );
3105    sigMean = aSig + alternateASig;
3106    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3107        aSig = alternateASig;
3108    }
3109    zSign = ( (sbits64) aSig < 0 );
3110    if ( zSign ) aSig = - aSig;
3111    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
3112
3113}
3114
3115/*
3116-------------------------------------------------------------------------------
3117Returns the square root of the double-precision floating-point value `a'.
3118The operation is performed according to the IEC/IEEE Standard for Binary
3119Floating-Point Arithmetic.
3120-------------------------------------------------------------------------------
3121*/
3122float64 float64_sqrt( float64 a )
3123{
3124    flag aSign;
3125    int16 aExp, zExp;
3126    bits64 aSig, zSig, doubleZSig;
3127    bits64 rem0, rem1, term0, term1;
3128
3129    aSig = extractFloat64Frac( a );
3130    aExp = extractFloat64Exp( a );
3131    aSign = extractFloat64Sign( a );
3132    if ( aExp == 0x7FF ) {
3133        if ( aSig ) return propagateFloat64NaN( a, a );
3134        if ( ! aSign ) return a;
3135        float_raise( float_flag_invalid );
3136        return float64_default_nan;
3137    }
3138    if ( aSign ) {
3139        if ( ( aExp | aSig ) == 0 ) return a;
3140        float_raise( float_flag_invalid );
3141        return float64_default_nan;
3142    }
3143    if ( aExp == 0 ) {
3144        if ( aSig == 0 ) return 0;
3145        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3146    }
3147    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3148    aSig |= LIT64( 0x0010000000000000 );
3149    zSig = estimateSqrt32( aExp, aSig>>21 );
3150    aSig <<= 9 - ( aExp & 1 );
3151    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3152    if ( ( zSig & 0x1FF ) <= 5 ) {
3153        doubleZSig = zSig<<1;
3154        mul64To128( zSig, zSig, &term0, &term1 );
3155        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3156        while ( (sbits64) rem0 < 0 ) {
3157            --zSig;
3158            doubleZSig -= 2;
3159            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3160        }
3161        zSig |= ( ( rem0 | rem1 ) != 0 );
3162    }
3163    return roundAndPackFloat64( 0, zExp, zSig );
3164
3165}
3166#endif
3167
3168/*
3169-------------------------------------------------------------------------------
3170Returns 1 if the double-precision floating-point value `a' is equal to the
3171corresponding value `b', and 0 otherwise.  The comparison is performed
3172according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3173-------------------------------------------------------------------------------
3174*/
3175flag float64_eq( float64 a, float64 b )
3176{
3177
3178    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3179         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3180       ) {
3181        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3182            float_raise( float_flag_invalid );
3183        }
3184        return 0;
3185    }
3186    return ( a == b ) ||
3187	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
3188
3189}
3190
3191/*
3192-------------------------------------------------------------------------------
3193Returns 1 if the double-precision floating-point value `a' is less than or
3194equal to the corresponding value `b', and 0 otherwise.  The comparison is
3195performed according to the IEC/IEEE Standard for Binary Floating-Point
3196Arithmetic.
3197-------------------------------------------------------------------------------
3198*/
3199flag float64_le( float64 a, float64 b )
3200{
3201    flag aSign, bSign;
3202
3203    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3204         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3205       ) {
3206        float_raise( float_flag_invalid );
3207        return 0;
3208    }
3209    aSign = extractFloat64Sign( a );
3210    bSign = extractFloat64Sign( b );
3211    if ( aSign != bSign )
3212	return aSign ||
3213	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
3214	      0 );
3215    return ( a == b ) ||
3216	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3217
3218}
3219
3220/*
3221-------------------------------------------------------------------------------
3222Returns 1 if the double-precision floating-point value `a' is less than
3223the corresponding value `b', and 0 otherwise.  The comparison is performed
3224according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3225-------------------------------------------------------------------------------
3226*/
3227flag float64_lt( float64 a, float64 b )
3228{
3229    flag aSign, bSign;
3230
3231    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3232         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3233       ) {
3234        float_raise( float_flag_invalid );
3235        return 0;
3236    }
3237    aSign = extractFloat64Sign( a );
3238    bSign = extractFloat64Sign( b );
3239    if ( aSign != bSign )
3240	return aSign &&
3241	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
3242	      0 );
3243    return ( a != b ) &&
3244	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3245
3246}
3247
3248#ifndef SOFTFLOAT_FOR_GCC
3249/*
3250-------------------------------------------------------------------------------
3251Returns 1 if the double-precision floating-point value `a' is equal to the
3252corresponding value `b', and 0 otherwise.  The invalid exception is raised
3253if either operand is a NaN.  Otherwise, the comparison is performed
3254according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3255-------------------------------------------------------------------------------
3256*/
3257flag float64_eq_signaling( float64 a, float64 b )
3258{
3259
3260    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3261         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3262       ) {
3263        float_raise( float_flag_invalid );
3264        return 0;
3265    }
3266    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
3267
3268}
3269
3270/*
3271-------------------------------------------------------------------------------
3272Returns 1 if the double-precision floating-point value `a' is less than or
3273equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3274cause an exception.  Otherwise, the comparison is performed according to the
3275IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3276-------------------------------------------------------------------------------
3277*/
3278flag float64_le_quiet( float64 a, float64 b )
3279{
3280    flag aSign, bSign;
3281
3282    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3283         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3284       ) {
3285        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3286            float_raise( float_flag_invalid );
3287        }
3288        return 0;
3289    }
3290    aSign = extractFloat64Sign( a );
3291    bSign = extractFloat64Sign( b );
3292    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
3293    return ( a == b ) || ( aSign ^ ( a < b ) );
3294
3295}
3296
3297/*
3298-------------------------------------------------------------------------------
3299Returns 1 if the double-precision floating-point value `a' is less than
3300the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3301exception.  Otherwise, the comparison is performed according to the IEC/IEEE
3302Standard for Binary Floating-Point Arithmetic.
3303-------------------------------------------------------------------------------
3304*/
3305flag float64_lt_quiet( float64 a, float64 b )
3306{
3307    flag aSign, bSign;
3308
3309    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3310         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3311       ) {
3312        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3313            float_raise( float_flag_invalid );
3314        }
3315        return 0;
3316    }
3317    aSign = extractFloat64Sign( a );
3318    bSign = extractFloat64Sign( b );
3319    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
3320    return ( a != b ) && ( aSign ^ ( a < b ) );
3321
3322}
3323#endif
3324
3325#ifdef FLOATX80
3326
3327/*
3328-------------------------------------------------------------------------------
3329Returns the result of converting the extended double-precision floating-
3330point value `a' to the 32-bit two's complement integer format.  The
3331conversion is performed according to the IEC/IEEE Standard for Binary
3332Floating-Point Arithmetic---which means in particular that the conversion
3333is rounded according to the current rounding mode.  If `a' is a NaN, the
3334largest positive integer is returned.  Otherwise, if the conversion
3335overflows, the largest integer with the same sign as `a' is returned.
3336-------------------------------------------------------------------------------
3337*/
3338int32 floatx80_to_int32( floatx80 a )
3339{
3340    flag aSign;
3341    int32 aExp, shiftCount;
3342    bits64 aSig;
3343
3344    aSig = extractFloatx80Frac( a );
3345    aExp = extractFloatx80Exp( a );
3346    aSign = extractFloatx80Sign( a );
3347    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3348    shiftCount = 0x4037 - aExp;
3349    if ( shiftCount <= 0 ) shiftCount = 1;
3350    shift64RightJamming( aSig, shiftCount, &aSig );
3351    return roundAndPackInt32( aSign, aSig );
3352
3353}
3354
3355/*
3356-------------------------------------------------------------------------------
3357Returns the result of converting the extended double-precision floating-
3358point value `a' to the 32-bit two's complement integer format.  The
3359conversion is performed according to the IEC/IEEE Standard for Binary
3360Floating-Point Arithmetic, except that the conversion is always rounded
3361toward zero.  If `a' is a NaN, the largest positive integer is returned.
3362Otherwise, if the conversion overflows, the largest integer with the same
3363sign as `a' is returned.
3364-------------------------------------------------------------------------------
3365*/
3366int32 floatx80_to_int32_round_to_zero( floatx80 a )
3367{
3368    flag aSign;
3369    int32 aExp, shiftCount;
3370    bits64 aSig, savedASig;
3371    int32 z;
3372
3373    aSig = extractFloatx80Frac( a );
3374    aExp = extractFloatx80Exp( a );
3375    aSign = extractFloatx80Sign( a );
3376    if ( 0x401E < aExp ) {
3377        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3378        goto invalid;
3379    }
3380    else if ( aExp < 0x3FFF ) {
3381        if ( aExp || aSig ) float_set_inexact();
3382        return 0;
3383    }
3384    shiftCount = 0x403E - aExp;
3385    savedASig = aSig;
3386    aSig >>= shiftCount;
3387    z = aSig;
3388    if ( aSign ) z = - z;
3389    if ( ( z < 0 ) ^ aSign ) {
3390 invalid:
3391        float_raise( float_flag_invalid );
3392        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3393    }
3394    if ( ( aSig<<shiftCount ) != savedASig ) {
3395        float_set_inexact();
3396    }
3397    return z;
3398
3399}
3400
3401/*
3402-------------------------------------------------------------------------------
3403Returns the result of converting the extended double-precision floating-
3404point value `a' to the 64-bit two's complement integer format.  The
3405conversion is performed according to the IEC/IEEE Standard for Binary
3406Floating-Point Arithmetic---which means in particular that the conversion
3407is rounded according to the current rounding mode.  If `a' is a NaN,
3408the largest positive integer is returned.  Otherwise, if the conversion
3409overflows, the largest integer with the same sign as `a' is returned.
3410-------------------------------------------------------------------------------
3411*/
3412int64 floatx80_to_int64( floatx80 a )
3413{
3414    flag aSign;
3415    int32 aExp, shiftCount;
3416    bits64 aSig, aSigExtra;
3417
3418    aSig = extractFloatx80Frac( a );
3419    aExp = extractFloatx80Exp( a );
3420    aSign = extractFloatx80Sign( a );
3421    shiftCount = 0x403E - aExp;
3422    if ( shiftCount <= 0 ) {
3423        if ( shiftCount ) {
3424            float_raise( float_flag_invalid );
3425            if (    ! aSign
3426                 || (    ( aExp == 0x7FFF )
3427                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3428               ) {
3429                return LIT64( 0x7FFFFFFFFFFFFFFF );
3430            }
3431            return (sbits64) LIT64( 0x8000000000000000 );
3432        }
3433        aSigExtra = 0;
3434    }
3435    else {
3436        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3437    }
3438    return roundAndPackInt64( aSign, aSig, aSigExtra );
3439
3440}
3441
3442/*
3443-------------------------------------------------------------------------------
3444Returns the result of converting the extended double-precision floating-
3445point value `a' to the 64-bit two's complement integer format.  The
3446conversion is performed according to the IEC/IEEE Standard for Binary
3447Floating-Point Arithmetic, except that the conversion is always rounded
3448toward zero.  If `a' is a NaN, the largest positive integer is returned.
3449Otherwise, if the conversion overflows, the largest integer with the same
3450sign as `a' is returned.
3451-------------------------------------------------------------------------------
3452*/
3453int64 floatx80_to_int64_round_to_zero( floatx80 a )
3454{
3455    flag aSign;
3456    int32 aExp, shiftCount;
3457    bits64 aSig;
3458    int64 z;
3459
3460    aSig = extractFloatx80Frac( a );
3461    aExp = extractFloatx80Exp( a );
3462    aSign = extractFloatx80Sign( a );
3463    shiftCount = aExp - 0x403E;
3464    if ( 0 <= shiftCount ) {
3465        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3466        if ( ( a.high != 0xC03E ) || aSig ) {
3467            float_raise( float_flag_invalid );
3468            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3469                return LIT64( 0x7FFFFFFFFFFFFFFF );
3470            }
3471        }
3472        return (sbits64) LIT64( 0x8000000000000000 );
3473    }
3474    else if ( aExp < 0x3FFF ) {
3475        if ( aExp | aSig ) float_set_inexact();
3476        return 0;
3477    }
3478    z = aSig>>( - shiftCount );
3479    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3480        float_set_inexact();
3481    }
3482    if ( aSign ) z = - z;
3483    return z;
3484
3485}
3486
3487/*
3488-------------------------------------------------------------------------------
3489Returns the result of converting the extended double-precision floating-
3490point value `a' to the single-precision floating-point format.  The
3491conversion is performed according to the IEC/IEEE Standard for Binary
3492Floating-Point Arithmetic.
3493-------------------------------------------------------------------------------
3494*/
3495float32 floatx80_to_float32( floatx80 a )
3496{
3497    flag aSign;
3498    int32 aExp;
3499    bits64 aSig;
3500
3501    aSig = extractFloatx80Frac( a );
3502    aExp = extractFloatx80Exp( a );
3503    aSign = extractFloatx80Sign( a );
3504    if ( aExp == 0x7FFF ) {
3505        if ( (bits64) ( aSig<<1 ) ) {
3506            return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
3507        }
3508        return packFloat32( aSign, 0xFF, 0 );
3509    }
3510    shift64RightJamming( aSig, 33, &aSig );
3511    if ( aExp || aSig ) aExp -= 0x3F81;
3512    return roundAndPackFloat32( aSign, aExp, aSig );
3513
3514}
3515
3516/*
3517-------------------------------------------------------------------------------
3518Returns the result of converting the extended double-precision floating-
3519point value `a' to the double-precision floating-point format.  The
3520conversion is performed according to the IEC/IEEE Standard for Binary
3521Floating-Point Arithmetic.
3522-------------------------------------------------------------------------------
3523*/
3524float64 floatx80_to_float64( floatx80 a )
3525{
3526    flag aSign;
3527    int32 aExp;
3528    bits64 aSig, zSig;
3529
3530    aSig = extractFloatx80Frac( a );
3531    aExp = extractFloatx80Exp( a );
3532    aSign = extractFloatx80Sign( a );
3533    if ( aExp == 0x7FFF ) {
3534        if ( (bits64) ( aSig<<1 ) ) {
3535            return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
3536        }
3537        return packFloat64( aSign, 0x7FF, 0 );
3538    }
3539    shift64RightJamming( aSig, 1, &zSig );
3540    if ( aExp || aSig ) aExp -= 0x3C01;
3541    return roundAndPackFloat64( aSign, aExp, zSig );
3542
3543}
3544
3545#ifdef FLOAT128
3546
3547/*
3548-------------------------------------------------------------------------------
3549Returns the result of converting the extended double-precision floating-
3550point value `a' to the quadruple-precision floating-point format.  The
3551conversion is performed according to the IEC/IEEE Standard for Binary
3552Floating-Point Arithmetic.
3553-------------------------------------------------------------------------------
3554*/
3555float128 floatx80_to_float128( floatx80 a )
3556{
3557    flag aSign;
3558    int16 aExp;
3559    bits64 aSig, zSig0, zSig1;
3560
3561    aSig = extractFloatx80Frac( a );
3562    aExp = extractFloatx80Exp( a );
3563    aSign = extractFloatx80Sign( a );
3564    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3565        return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
3566    }
3567    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3568    return packFloat128( aSign, aExp, zSig0, zSig1 );
3569
3570}
3571
3572#endif
3573
3574/*
3575-------------------------------------------------------------------------------
3576Rounds the extended double-precision floating-point value `a' to an integer,
3577and returns the result as an extended quadruple-precision floating-point
3578value.  The operation is performed according to the IEC/IEEE Standard for
3579Binary Floating-Point Arithmetic.
3580-------------------------------------------------------------------------------
3581*/
3582floatx80 floatx80_round_to_int( floatx80 a )
3583{
3584    flag aSign;
3585    int32 aExp;
3586    bits64 lastBitMask, roundBitsMask;
3587    int8 roundingMode;
3588    floatx80 z;
3589
3590    aExp = extractFloatx80Exp( a );
3591    if ( 0x403E <= aExp ) {
3592        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3593            return propagateFloatx80NaN( a, a );
3594        }
3595        return a;
3596    }
3597    if ( aExp < 0x3FFF ) {
3598        if (    ( aExp == 0 )
3599             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3600            return a;
3601        }
3602        float_set_inexact();
3603        aSign = extractFloatx80Sign( a );
3604        switch ( float_rounding_mode() ) {
3605         case float_round_nearest_even:
3606            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3607               ) {
3608                return
3609                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3610            }
3611            break;
3612         case float_round_down:
3613            return
3614                  aSign ?
3615                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3616                : packFloatx80( 0, 0, 0 );
3617         case float_round_up:
3618            return
3619                  aSign ? packFloatx80( 1, 0, 0 )
3620                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3621        }
3622        return packFloatx80( aSign, 0, 0 );
3623    }
3624    lastBitMask = 1;
3625    lastBitMask <<= 0x403E - aExp;
3626    roundBitsMask = lastBitMask - 1;
3627    z = a;
3628    roundingMode = float_rounding_mode();
3629    if ( roundingMode == float_round_nearest_even ) {
3630        z.low += lastBitMask>>1;
3631        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3632    }
3633    else if ( roundingMode != float_round_to_zero ) {
3634        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3635            z.low += roundBitsMask;
3636        }
3637    }
3638    z.low &= ~ roundBitsMask;
3639    if ( z.low == 0 ) {
3640        ++z.high;
3641        z.low = LIT64( 0x8000000000000000 );
3642    }
3643    if ( z.low != a.low ) float_set_inexact();
3644    return z;
3645
3646}
3647
3648/*
3649-------------------------------------------------------------------------------
3650Returns the result of adding the absolute values of the extended double-
3651precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
3652negated before being returned.  `zSign' is ignored if the result is a NaN.
3653The addition is performed according to the IEC/IEEE Standard for Binary
3654Floating-Point Arithmetic.
3655-------------------------------------------------------------------------------
3656*/
3657static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3658{
3659    int32 aExp, bExp, zExp;
3660    bits64 aSig, bSig, zSig0, zSig1;
3661    int32 expDiff;
3662
3663    aSig = extractFloatx80Frac( a );
3664    aExp = extractFloatx80Exp( a );
3665    bSig = extractFloatx80Frac( b );
3666    bExp = extractFloatx80Exp( b );
3667    expDiff = aExp - bExp;
3668    if ( 0 < expDiff ) {
3669        if ( aExp == 0x7FFF ) {
3670            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3671            return a;
3672        }
3673        if ( bExp == 0 ) --expDiff;
3674        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3675        zExp = aExp;
3676    }
3677    else if ( expDiff < 0 ) {
3678        if ( bExp == 0x7FFF ) {
3679            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3680            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3681        }
3682        if ( aExp == 0 ) ++expDiff;
3683        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3684        zExp = bExp;
3685    }
3686    else {
3687        if ( aExp == 0x7FFF ) {
3688            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3689                return propagateFloatx80NaN( a, b );
3690            }
3691            return a;
3692        }
3693        zSig1 = 0;
3694        zSig0 = aSig + bSig;
3695        if ( aExp == 0 ) {
3696            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3697            goto roundAndPack;
3698        }
3699        zExp = aExp;
3700        goto shiftRight1;
3701    }
3702    zSig0 = aSig + bSig;
3703    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3704 shiftRight1:
3705    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3706    zSig0 |= LIT64( 0x8000000000000000 );
3707    ++zExp;
3708 roundAndPack:
3709    return
3710        roundAndPackFloatx80(
3711            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3712
3713}
3714
3715/*
3716-------------------------------------------------------------------------------
3717Returns the result of subtracting the absolute values of the extended
3718double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
3719difference is negated before being returned.  `zSign' is ignored if the
3720result is a NaN.  The subtraction is performed according to the IEC/IEEE
3721Standard for Binary Floating-Point Arithmetic.
3722-------------------------------------------------------------------------------
3723*/
3724static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3725{
3726    int32 aExp, bExp, zExp;
3727    bits64 aSig, bSig, zSig0, zSig1;
3728    int32 expDiff;
3729    floatx80 z;
3730
3731    aSig = extractFloatx80Frac( a );
3732    aExp = extractFloatx80Exp( a );
3733    bSig = extractFloatx80Frac( b );
3734    bExp = extractFloatx80Exp( b );
3735    expDiff = aExp - bExp;
3736    if ( 0 < expDiff ) goto aExpBigger;
3737    if ( expDiff < 0 ) goto bExpBigger;
3738    if ( aExp == 0x7FFF ) {
3739        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3740            return propagateFloatx80NaN( a, b );
3741        }
3742        float_raise( float_flag_invalid );
3743        z.low = floatx80_default_nan_low;
3744        z.high = floatx80_default_nan_high;
3745        return z;
3746    }
3747    if ( aExp == 0 ) {
3748        aExp = 1;
3749        bExp = 1;
3750    }
3751    zSig1 = 0;
3752    if ( bSig < aSig ) goto aBigger;
3753    if ( aSig < bSig ) goto bBigger;
3754    return packFloatx80( float_rounding_mode() == float_round_down, 0, 0 );
3755 bExpBigger:
3756    if ( bExp == 0x7FFF ) {
3757        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3758        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3759    }
3760    if ( aExp == 0 ) ++expDiff;
3761    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3762 bBigger:
3763    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3764    zExp = bExp;
3765    zSign ^= 1;
3766    goto normalizeRoundAndPack;
3767 aExpBigger:
3768    if ( aExp == 0x7FFF ) {
3769        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3770        return a;
3771    }
3772    if ( bExp == 0 ) --expDiff;
3773    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3774 aBigger:
3775    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3776    zExp = aExp;
3777 normalizeRoundAndPack:
3778    return
3779        normalizeRoundAndPackFloatx80(
3780            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3781
3782}
3783
3784/*
3785-------------------------------------------------------------------------------
3786Returns the result of adding the extended double-precision floating-point
3787values `a' and `b'.  The operation is performed according to the IEC/IEEE
3788Standard for Binary Floating-Point Arithmetic.
3789-------------------------------------------------------------------------------
3790*/
3791floatx80 floatx80_add( floatx80 a, floatx80 b )
3792{
3793    flag aSign, bSign;
3794
3795    aSign = extractFloatx80Sign( a );
3796    bSign = extractFloatx80Sign( b );
3797    if ( aSign == bSign ) {
3798        return addFloatx80Sigs( a, b, aSign );
3799    }
3800    else {
3801        return subFloatx80Sigs( a, b, aSign );
3802    }
3803
3804}
3805
3806/*
3807-------------------------------------------------------------------------------
3808Returns the result of subtracting the extended double-precision floating-
3809point values `a' and `b'.  The operation is performed according to the
3810IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3811-------------------------------------------------------------------------------
3812*/
3813floatx80 floatx80_sub( floatx80 a, floatx80 b )
3814{
3815    flag aSign, bSign;
3816
3817    aSign = extractFloatx80Sign( a );
3818    bSign = extractFloatx80Sign( b );
3819    if ( aSign == bSign ) {
3820        return subFloatx80Sigs( a, b, aSign );
3821    }
3822    else {
3823        return addFloatx80Sigs( a, b, aSign );
3824    }
3825
3826}
3827
3828/*
3829-------------------------------------------------------------------------------
3830Returns the result of multiplying the extended double-precision floating-
3831point values `a' and `b'.  The operation is performed according to the
3832IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3833-------------------------------------------------------------------------------
3834*/
3835floatx80 floatx80_mul( floatx80 a, floatx80 b )
3836{
3837    flag aSign, bSign, zSign;
3838    int32 aExp, bExp, zExp;
3839    bits64 aSig, bSig, zSig0, zSig1;
3840    floatx80 z;
3841
3842    aSig = extractFloatx80Frac( a );
3843    aExp = extractFloatx80Exp( a );
3844    aSign = extractFloatx80Sign( a );
3845    bSig = extractFloatx80Frac( b );
3846    bExp = extractFloatx80Exp( b );
3847    bSign = extractFloatx80Sign( b );
3848    zSign = aSign ^ bSign;
3849    if ( aExp == 0x7FFF ) {
3850        if (    (bits64) ( aSig<<1 )
3851             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3852            return propagateFloatx80NaN( a, b );
3853        }
3854        if ( ( bExp | bSig ) == 0 ) goto invalid;
3855        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3856    }
3857    if ( bExp == 0x7FFF ) {
3858        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3859        if ( ( aExp | aSig ) == 0 ) {
3860 invalid:
3861            float_raise( float_flag_invalid );
3862            z.low = floatx80_default_nan_low;
3863            z.high = floatx80_default_nan_high;
3864            return z;
3865        }
3866        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3867    }
3868    if ( aExp == 0 ) {
3869        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3870        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3871    }
3872    if ( bExp == 0 ) {
3873        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3874        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3875    }
3876    zExp = aExp + bExp - 0x3FFE;
3877    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3878    if ( 0 < (sbits64) zSig0 ) {
3879        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3880        --zExp;
3881    }
3882    return
3883        roundAndPackFloatx80(
3884            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3885
3886}
3887
3888/*
3889-------------------------------------------------------------------------------
3890Returns the result of dividing the extended double-precision floating-point
3891value `a' by the corresponding value `b'.  The operation is performed
3892according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3893-------------------------------------------------------------------------------
3894*/
3895floatx80 floatx80_div( floatx80 a, floatx80 b )
3896{
3897    flag aSign, bSign, zSign;
3898    int32 aExp, bExp, zExp;
3899    bits64 aSig, bSig, zSig0, zSig1;
3900    bits64 rem0, rem1, rem2, term0, term1, term2;
3901    floatx80 z;
3902
3903    aSig = extractFloatx80Frac( a );
3904    aExp = extractFloatx80Exp( a );
3905    aSign = extractFloatx80Sign( a );
3906    bSig = extractFloatx80Frac( b );
3907    bExp = extractFloatx80Exp( b );
3908    bSign = extractFloatx80Sign( b );
3909    zSign = aSign ^ bSign;
3910    if ( aExp == 0x7FFF ) {
3911        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3912        if ( bExp == 0x7FFF ) {
3913            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3914            goto invalid;
3915        }
3916        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3917    }
3918    if ( bExp == 0x7FFF ) {
3919        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3920        return packFloatx80( zSign, 0, 0 );
3921    }
3922    if ( bExp == 0 ) {
3923        if ( bSig == 0 ) {
3924            if ( ( aExp | aSig ) == 0 ) {
3925 invalid:
3926                float_raise( float_flag_invalid );
3927                z.low = floatx80_default_nan_low;
3928                z.high = floatx80_default_nan_high;
3929                return z;
3930            }
3931            float_raise( float_flag_divbyzero );
3932            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3933        }
3934        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3935    }
3936    if ( aExp == 0 ) {
3937        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3938        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3939    }
3940    zExp = aExp - bExp + 0x3FFE;
3941    rem1 = 0;
3942    if ( bSig <= aSig ) {
3943        shift128Right( aSig, 0, 1, &aSig, &rem1 );
3944        ++zExp;
3945    }
3946    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3947    mul64To128( bSig, zSig0, &term0, &term1 );
3948    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3949    while ( (sbits64) rem0 < 0 ) {
3950        --zSig0;
3951        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3952    }
3953    zSig1 = estimateDiv128To64( rem1, 0, bSig );
3954    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3955        mul64To128( bSig, zSig1, &term1, &term2 );
3956        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3957        while ( (sbits64) rem1 < 0 ) {
3958            --zSig1;
3959            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3960        }
3961        zSig1 |= ( ( rem1 | rem2 ) != 0 );
3962    }
3963    return
3964        roundAndPackFloatx80(
3965            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3966
3967}
3968
3969/*
3970-------------------------------------------------------------------------------
3971Returns the remainder of the extended double-precision floating-point value
3972`a' with respect to the corresponding value `b'.  The operation is performed
3973according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3974-------------------------------------------------------------------------------
3975*/
3976floatx80 floatx80_rem( floatx80 a, floatx80 b )
3977{
3978    flag aSign, bSign, zSign;
3979    int32 aExp, bExp, expDiff;
3980    bits64 aSig0, aSig1, bSig;
3981    bits64 q, term0, term1, alternateASig0, alternateASig1;
3982    floatx80 z;
3983
3984    aSig0 = extractFloatx80Frac( a );
3985    aExp = extractFloatx80Exp( a );
3986    aSign = extractFloatx80Sign( a );
3987    bSig = extractFloatx80Frac( b );
3988    bExp = extractFloatx80Exp( b );
3989    bSign = extractFloatx80Sign( b );
3990    if ( aExp == 0x7FFF ) {
3991        if (    (bits64) ( aSig0<<1 )
3992             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3993            return propagateFloatx80NaN( a, b );
3994        }
3995        goto invalid;
3996    }
3997    if ( bExp == 0x7FFF ) {
3998        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3999        return a;
4000    }
4001    if ( bExp == 0 ) {
4002        if ( bSig == 0 ) {
4003 invalid:
4004            float_raise( float_flag_invalid );
4005            z.low = floatx80_default_nan_low;
4006            z.high = floatx80_default_nan_high;
4007            return z;
4008        }
4009        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
4010    }
4011    if ( aExp == 0 ) {
4012        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
4013        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4014    }
4015    bSig |= LIT64( 0x8000000000000000 );
4016    zSign = aSign;
4017    expDiff = aExp - bExp;
4018    aSig1 = 0;
4019    if ( expDiff < 0 ) {
4020        if ( expDiff < -1 ) return a;
4021        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
4022        expDiff = 0;
4023    }
4024    q = ( bSig <= aSig0 );
4025    if ( q ) aSig0 -= bSig;
4026    expDiff -= 64;
4027    while ( 0 < expDiff ) {
4028        q = estimateDiv128To64( aSig0, aSig1, bSig );
4029        q = ( 2 < q ) ? q - 2 : 0;
4030        mul64To128( bSig, q, &term0, &term1 );
4031        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4032        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
4033        expDiff -= 62;
4034    }
4035    expDiff += 64;
4036    if ( 0 < expDiff ) {
4037        q = estimateDiv128To64( aSig0, aSig1, bSig );
4038        q = ( 2 < q ) ? q - 2 : 0;
4039        q >>= 64 - expDiff;
4040        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4041        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4042        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4043        while ( le128( term0, term1, aSig0, aSig1 ) ) {
4044            ++q;
4045            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4046        }
4047    }
4048    else {
4049        term1 = 0;
4050        term0 = bSig;
4051    }
4052    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4053    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4054         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4055              && ( q & 1 ) )
4056       ) {
4057        aSig0 = alternateASig0;
4058        aSig1 = alternateASig1;
4059        zSign = ! zSign;
4060    }
4061    return
4062        normalizeRoundAndPackFloatx80(
4063            80, zSign, bExp + expDiff, aSig0, aSig1 );
4064
4065}
4066
4067/*
4068-------------------------------------------------------------------------------
4069Returns the square root of the extended double-precision floating-point
4070value `a'.  The operation is performed according to the IEC/IEEE Standard
4071for Binary Floating-Point Arithmetic.
4072-------------------------------------------------------------------------------
4073*/
4074floatx80 floatx80_sqrt( floatx80 a )
4075{
4076    flag aSign;
4077    int32 aExp, zExp;
4078    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4079    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4080    floatx80 z;
4081
4082    aSig0 = extractFloatx80Frac( a );
4083    aExp = extractFloatx80Exp( a );
4084    aSign = extractFloatx80Sign( a );
4085    if ( aExp == 0x7FFF ) {
4086        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
4087        if ( ! aSign ) return a;
4088        goto invalid;
4089    }
4090    if ( aSign ) {
4091        if ( ( aExp | aSig0 ) == 0 ) return a;
4092 invalid:
4093        float_raise( float_flag_invalid );
4094        z.low = floatx80_default_nan_low;
4095        z.high = floatx80_default_nan_high;
4096        return z;
4097    }
4098    if ( aExp == 0 ) {
4099        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4100        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4101    }
4102    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4103    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4104    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4105    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4106    doubleZSig0 = zSig0<<1;
4107    mul64To128( zSig0, zSig0, &term0, &term1 );
4108    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4109    while ( (sbits64) rem0 < 0 ) {
4110        --zSig0;
4111        doubleZSig0 -= 2;
4112        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4113    }
4114    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4115    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4116        if ( zSig1 == 0 ) zSig1 = 1;
4117        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4118        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4119        mul64To128( zSig1, zSig1, &term2, &term3 );
4120        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4121        while ( (sbits64) rem1 < 0 ) {
4122            --zSig1;
4123            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4124            term3 |= 1;
4125            term2 |= doubleZSig0;
4126            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4127        }
4128        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4129    }
4130    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4131    zSig0 |= doubleZSig0;
4132    return
4133        roundAndPackFloatx80(
4134            floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
4135
4136}
4137
4138/*
4139-------------------------------------------------------------------------------
4140Returns 1 if the extended double-precision floating-point value `a' is
4141equal to the corresponding value `b', and 0 otherwise.  The comparison is
4142performed according to the IEC/IEEE Standard for Binary Floating-Point
4143Arithmetic.
4144-------------------------------------------------------------------------------
4145*/
4146flag floatx80_eq( floatx80 a, floatx80 b )
4147{
4148
4149    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4150              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4151         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4152              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4153       ) {
4154        if (    floatx80_is_signaling_nan( a )
4155             || floatx80_is_signaling_nan( b ) ) {
4156            float_raise( float_flag_invalid );
4157        }
4158        return 0;
4159    }
4160    return
4161           ( a.low == b.low )
4162        && (    ( a.high == b.high )
4163             || (    ( a.low == 0 )
4164                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4165           );
4166
4167}
4168
4169/*
4170-------------------------------------------------------------------------------
4171Returns 1 if the extended double-precision floating-point value `a' is
4172less than or equal to the corresponding value `b', and 0 otherwise.  The
4173comparison is performed according to the IEC/IEEE Standard for Binary
4174Floating-Point Arithmetic.
4175-------------------------------------------------------------------------------
4176*/
4177flag floatx80_le( floatx80 a, floatx80 b )
4178{
4179    flag aSign, bSign;
4180
4181    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4182              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4183         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4184              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4185       ) {
4186        float_raise( float_flag_invalid );
4187        return 0;
4188    }
4189    aSign = extractFloatx80Sign( a );
4190    bSign = extractFloatx80Sign( b );
4191    if ( aSign != bSign ) {
4192        return
4193               aSign
4194            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4195                 == 0 );
4196    }
4197    return
4198          aSign ? le128( b.high, b.low, a.high, a.low )
4199        : le128( a.high, a.low, b.high, b.low );
4200
4201}
4202
4203/*
4204-------------------------------------------------------------------------------
4205Returns 1 if the extended double-precision floating-point value `a' is
4206less than the corresponding value `b', and 0 otherwise.  The comparison
4207is performed according to the IEC/IEEE Standard for Binary Floating-Point
4208Arithmetic.
4209-------------------------------------------------------------------------------
4210*/
4211flag floatx80_lt( floatx80 a, floatx80 b )
4212{
4213    flag aSign, bSign;
4214
4215    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4216              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4217         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4218              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4219       ) {
4220        float_raise( float_flag_invalid );
4221        return 0;
4222    }
4223    aSign = extractFloatx80Sign( a );
4224    bSign = extractFloatx80Sign( b );
4225    if ( aSign != bSign ) {
4226        return
4227               aSign
4228            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4229                 != 0 );
4230    }
4231    return
4232          aSign ? lt128( b.high, b.low, a.high, a.low )
4233        : lt128( a.high, a.low, b.high, b.low );
4234
4235}
4236
4237/*
4238-------------------------------------------------------------------------------
4239Returns 1 if the extended double-precision floating-point value `a' is equal
4240to the corresponding value `b', and 0 otherwise.  The invalid exception is
4241raised if either operand is a NaN.  Otherwise, the comparison is performed
4242according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4243-------------------------------------------------------------------------------
4244*/
4245flag floatx80_eq_signaling( floatx80 a, floatx80 b )
4246{
4247
4248    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4249              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4250         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4251              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4252       ) {
4253        float_raise( float_flag_invalid );
4254        return 0;
4255    }
4256    return
4257           ( a.low == b.low )
4258        && (    ( a.high == b.high )
4259             || (    ( a.low == 0 )
4260                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4261           );
4262
4263}
4264
4265/*
4266-------------------------------------------------------------------------------
4267Returns 1 if the extended double-precision floating-point value `a' is less
4268than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4269do not cause an exception.  Otherwise, the comparison is performed according
4270to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4271-------------------------------------------------------------------------------
4272*/
4273flag floatx80_le_quiet( floatx80 a, floatx80 b )
4274{
4275    flag aSign, bSign;
4276
4277    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4278              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4279         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4280              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4281       ) {
4282        if (    floatx80_is_signaling_nan( a )
4283             || floatx80_is_signaling_nan( b ) ) {
4284            float_raise( float_flag_invalid );
4285        }
4286        return 0;
4287    }
4288    aSign = extractFloatx80Sign( a );
4289    bSign = extractFloatx80Sign( b );
4290    if ( aSign != bSign ) {
4291        return
4292               aSign
4293            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4294                 == 0 );
4295    }
4296    return
4297          aSign ? le128( b.high, b.low, a.high, a.low )
4298        : le128( a.high, a.low, b.high, b.low );
4299
4300}
4301
4302/*
4303-------------------------------------------------------------------------------
4304Returns 1 if the extended double-precision floating-point value `a' is less
4305than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4306an exception.  Otherwise, the comparison is performed according to the
4307IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4308-------------------------------------------------------------------------------
4309*/
4310flag floatx80_lt_quiet( floatx80 a, floatx80 b )
4311{
4312    flag aSign, bSign;
4313
4314    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4315              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4316         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4317              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4318       ) {
4319        if (    floatx80_is_signaling_nan( a )
4320             || floatx80_is_signaling_nan( b ) ) {
4321            float_raise( float_flag_invalid );
4322        }
4323        return 0;
4324    }
4325    aSign = extractFloatx80Sign( a );
4326    bSign = extractFloatx80Sign( b );
4327    if ( aSign != bSign ) {
4328        return
4329               aSign
4330            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4331                 != 0 );
4332    }
4333    return
4334          aSign ? lt128( b.high, b.low, a.high, a.low )
4335        : lt128( a.high, a.low, b.high, b.low );
4336
4337}
4338
4339#endif
4340
4341#ifdef FLOAT128
4342
4343/*
4344-------------------------------------------------------------------------------
4345Returns the result of converting the quadruple-precision floating-point
4346value `a' to the 32-bit two's complement integer format.  The conversion
4347is performed according to the IEC/IEEE Standard for Binary Floating-Point
4348Arithmetic---which means in particular that the conversion is rounded
4349according to the current rounding mode.  If `a' is a NaN, the largest
4350positive integer is returned.  Otherwise, if the conversion overflows, the
4351largest integer with the same sign as `a' is returned.
4352-------------------------------------------------------------------------------
4353*/
4354int32 float128_to_int32( float128 a )
4355{
4356    flag aSign;
4357    int32 aExp, shiftCount;
4358    bits64 aSig0, aSig1;
4359
4360    aSig1 = extractFloat128Frac1( a );
4361    aSig0 = extractFloat128Frac0( a );
4362    aExp = extractFloat128Exp( a );
4363    aSign = extractFloat128Sign( a );
4364    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4365    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4366    aSig0 |= ( aSig1 != 0 );
4367    shiftCount = 0x4028 - aExp;
4368    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4369    return roundAndPackInt32( aSign, aSig0 );
4370
4371}
4372
4373/*
4374-------------------------------------------------------------------------------
4375Returns the result of converting the quadruple-precision floating-point
4376value `a' to the 32-bit two's complement integer format.  The conversion
4377is performed according to the IEC/IEEE Standard for Binary Floating-Point
4378Arithmetic, except that the conversion is always rounded toward zero.  If
4379`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4380conversion overflows, the largest integer with the same sign as `a' is
4381returned.
4382-------------------------------------------------------------------------------
4383*/
4384int32 float128_to_int32_round_to_zero( float128 a )
4385{
4386    flag aSign;
4387    int32 aExp, shiftCount;
4388    bits64 aSig0, aSig1, savedASig;
4389    int32 z;
4390
4391    aSig1 = extractFloat128Frac1( a );
4392    aSig0 = extractFloat128Frac0( a );
4393    aExp = extractFloat128Exp( a );
4394    aSign = extractFloat128Sign( a );
4395    aSig0 |= ( aSig1 != 0 );
4396    if ( 0x401E < aExp ) {
4397        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4398        goto invalid;
4399    }
4400    else if ( aExp < 0x3FFF ) {
4401        if ( aExp || aSig0 ) float_set_inexact();
4402        return 0;
4403    }
4404    aSig0 |= LIT64( 0x0001000000000000 );
4405    shiftCount = 0x402F - aExp;
4406    savedASig = aSig0;
4407    aSig0 >>= shiftCount;
4408    z = aSig0;
4409    if ( aSign ) z = - z;
4410    if ( ( z < 0 ) ^ aSign ) {
4411 invalid:
4412        float_raise( float_flag_invalid );
4413        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4414    }
4415    if ( ( aSig0<<shiftCount ) != savedASig ) {
4416        float_set_inexact();
4417    }
4418    return z;
4419
4420}
4421
4422/*
4423-------------------------------------------------------------------------------
4424Returns the result of converting the quadruple-precision floating-point
4425value `a' to the 64-bit two's complement integer format.  The conversion
4426is performed according to the IEC/IEEE Standard for Binary Floating-Point
4427Arithmetic---which means in particular that the conversion is rounded
4428according to the current rounding mode.  If `a' is a NaN, the largest
4429positive integer is returned.  Otherwise, if the conversion overflows, the
4430largest integer with the same sign as `a' is returned.
4431-------------------------------------------------------------------------------
4432*/
4433int64 float128_to_int64( float128 a )
4434{
4435    flag aSign;
4436    int32 aExp, shiftCount;
4437    bits64 aSig0, aSig1;
4438
4439    aSig1 = extractFloat128Frac1( a );
4440    aSig0 = extractFloat128Frac0( a );
4441    aExp = extractFloat128Exp( a );
4442    aSign = extractFloat128Sign( a );
4443    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4444    shiftCount = 0x402F - aExp;
4445    if ( shiftCount <= 0 ) {
4446        if ( 0x403E < aExp ) {
4447            float_raise( float_flag_invalid );
4448            if (    ! aSign
4449                 || (    ( aExp == 0x7FFF )
4450                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4451                    )
4452               ) {
4453                return LIT64( 0x7FFFFFFFFFFFFFFF );
4454            }
4455            return (sbits64) LIT64( 0x8000000000000000 );
4456        }
4457        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4458    }
4459    else {
4460        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4461    }
4462    return roundAndPackInt64( aSign, aSig0, aSig1 );
4463
4464}
4465
4466/*
4467-------------------------------------------------------------------------------
4468Returns the result of converting the quadruple-precision floating-point
4469value `a' to the 64-bit two's complement integer format.  The conversion
4470is performed according to the IEC/IEEE Standard for Binary Floating-Point
4471Arithmetic, except that the conversion is always rounded toward zero.
4472If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4473the conversion overflows, the largest integer with the same sign as `a' is
4474returned.
4475-------------------------------------------------------------------------------
4476*/
4477int64 float128_to_int64_round_to_zero( float128 a )
4478{
4479    flag aSign;
4480    int32 aExp, shiftCount;
4481    bits64 aSig0, aSig1;
4482    int64 z;
4483
4484    aSig1 = extractFloat128Frac1( a );
4485    aSig0 = extractFloat128Frac0( a );
4486    aExp = extractFloat128Exp( a );
4487    aSign = extractFloat128Sign( a );
4488    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4489    shiftCount = aExp - 0x402F;
4490    if ( 0 < shiftCount ) {
4491        if ( 0x403E <= aExp ) {
4492            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4493            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4494                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4495                if ( aSig1 ) float_set_inexact();
4496            }
4497            else {
4498                float_raise( float_flag_invalid );
4499                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4500                    return LIT64( 0x7FFFFFFFFFFFFFFF );
4501                }
4502            }
4503            return (sbits64) LIT64( 0x8000000000000000 );
4504        }
4505        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4506        if ( (bits64) ( aSig1<<shiftCount ) ) {
4507            float_set_inexact();
4508        }
4509    }
4510    else {
4511        if ( aExp < 0x3FFF ) {
4512            if ( aExp | aSig0 | aSig1 ) {
4513                float_set_inexact();
4514            }
4515            return 0;
4516        }
4517        z = aSig0>>( - shiftCount );
4518        if (    aSig1
4519             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4520            float_set_inexact();
4521        }
4522    }
4523    if ( aSign ) z = - z;
4524    return z;
4525
4526}
4527
4528/*
4529-------------------------------------------------------------------------------
4530Returns the result of converting the quadruple-precision floating-point
4531value `a' to the single-precision floating-point format.  The conversion
4532is performed according to the IEC/IEEE Standard for Binary Floating-Point
4533Arithmetic.
4534-------------------------------------------------------------------------------
4535*/
4536float32 float128_to_float32( float128 a )
4537{
4538    flag aSign;
4539    int32 aExp;
4540    bits64 aSig0, aSig1;
4541    bits32 zSig;
4542
4543    aSig1 = extractFloat128Frac1( a );
4544    aSig0 = extractFloat128Frac0( a );
4545    aExp = extractFloat128Exp( a );
4546    aSign = extractFloat128Sign( a );
4547    if ( aExp == 0x7FFF ) {
4548        if ( aSig0 | aSig1 ) {
4549            return commonNaNToFloat32( float128ToCommonNaN( a ) );
4550        }
4551        return packFloat32( aSign, 0xFF, 0 );
4552    }
4553    aSig0 |= ( aSig1 != 0 );
4554    shift64RightJamming( aSig0, 18, &aSig0 );
4555    zSig = aSig0;
4556    if ( aExp || zSig ) {
4557        zSig |= 0x40000000;
4558        aExp -= 0x3F81;
4559    }
4560    return roundAndPackFloat32( aSign, aExp, zSig );
4561
4562}
4563
4564/*
4565-------------------------------------------------------------------------------
4566Returns the result of converting the quadruple-precision floating-point
4567value `a' to the double-precision floating-point format.  The conversion
4568is performed according to the IEC/IEEE Standard for Binary Floating-Point
4569Arithmetic.
4570-------------------------------------------------------------------------------
4571*/
4572float64 float128_to_float64( float128 a )
4573{
4574    flag aSign;
4575    int32 aExp;
4576    bits64 aSig0, aSig1;
4577
4578    aSig1 = extractFloat128Frac1( a );
4579    aSig0 = extractFloat128Frac0( a );
4580    aExp = extractFloat128Exp( a );
4581    aSign = extractFloat128Sign( a );
4582    if ( aExp == 0x7FFF ) {
4583        if ( aSig0 | aSig1 ) {
4584            return commonNaNToFloat64( float128ToCommonNaN( a ) );
4585        }
4586        return packFloat64( aSign, 0x7FF, 0 );
4587    }
4588    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4589    aSig0 |= ( aSig1 != 0 );
4590    if ( aExp || aSig0 ) {
4591        aSig0 |= LIT64( 0x4000000000000000 );
4592        aExp -= 0x3C01;
4593    }
4594    return roundAndPackFloat64( aSign, aExp, aSig0 );
4595
4596}
4597
4598#ifdef FLOATX80
4599
4600/*
4601-------------------------------------------------------------------------------
4602Returns the result of converting the quadruple-precision floating-point
4603value `a' to the extended double-precision floating-point format.  The
4604conversion is performed according to the IEC/IEEE Standard for Binary
4605Floating-Point Arithmetic.
4606-------------------------------------------------------------------------------
4607*/
4608floatx80 float128_to_floatx80( float128 a )
4609{
4610    flag aSign;
4611    int32 aExp;
4612    bits64 aSig0, aSig1;
4613
4614    aSig1 = extractFloat128Frac1( a );
4615    aSig0 = extractFloat128Frac0( a );
4616    aExp = extractFloat128Exp( a );
4617    aSign = extractFloat128Sign( a );
4618    if ( aExp == 0x7FFF ) {
4619        if ( aSig0 | aSig1 ) {
4620            return commonNaNToFloatx80( float128ToCommonNaN( a ) );
4621        }
4622        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4623    }
4624    if ( aExp == 0 ) {
4625        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4626        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4627    }
4628    else {
4629        aSig0 |= LIT64( 0x0001000000000000 );
4630    }
4631    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4632    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
4633
4634}
4635
4636#endif
4637
4638/*
4639-------------------------------------------------------------------------------
4640Rounds the quadruple-precision floating-point value `a' to an integer, and
4641returns the result as a quadruple-precision floating-point value.  The
4642operation is performed according to the IEC/IEEE Standard for Binary
4643Floating-Point Arithmetic.
4644-------------------------------------------------------------------------------
4645*/
4646float128 float128_round_to_int( float128 a )
4647{
4648    flag aSign;
4649    int32 aExp;
4650    bits64 lastBitMask, roundBitsMask;
4651    int8 roundingMode;
4652    float128 z;
4653
4654    aExp = extractFloat128Exp( a );
4655    if ( 0x402F <= aExp ) {
4656        if ( 0x406F <= aExp ) {
4657            if (    ( aExp == 0x7FFF )
4658                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4659               ) {
4660                return propagateFloat128NaN( a, a );
4661            }
4662            return a;
4663        }
4664        lastBitMask = 1;
4665        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4666        roundBitsMask = lastBitMask - 1;
4667        z = a;
4668        roundingMode = float_rounding_mode();
4669        if ( roundingMode == float_round_nearest_even ) {
4670            if ( lastBitMask ) {
4671                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4672                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4673            }
4674            else {
4675                if ( (sbits64) z.low < 0 ) {
4676                    ++z.high;
4677                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4678                }
4679            }
4680        }
4681        else if ( roundingMode != float_round_to_zero ) {
4682            if (   extractFloat128Sign( z )
4683                 ^ ( roundingMode == float_round_up ) ) {
4684                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4685            }
4686        }
4687        z.low &= ~ roundBitsMask;
4688    }
4689    else {
4690        if ( aExp < 0x3FFF ) {
4691            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4692            float_set_inexact();
4693            aSign = extractFloat128Sign( a );
4694            switch ( float_rounding_mode() ) {
4695             case float_round_nearest_even:
4696                if (    ( aExp == 0x3FFE )
4697                     && (   extractFloat128Frac0( a )
4698                          | extractFloat128Frac1( a ) )
4699                   ) {
4700                    return packFloat128( aSign, 0x3FFF, 0, 0 );
4701                }
4702                break;
4703             case float_round_down:
4704                return
4705                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4706                    : packFloat128( 0, 0, 0, 0 );
4707             case float_round_up:
4708                return
4709                      aSign ? packFloat128( 1, 0, 0, 0 )
4710                    : packFloat128( 0, 0x3FFF, 0, 0 );
4711            }
4712            return packFloat128( aSign, 0, 0, 0 );
4713        }
4714        lastBitMask = 1;
4715        lastBitMask <<= 0x402F - aExp;
4716        roundBitsMask = lastBitMask - 1;
4717        z.low = 0;
4718        z.high = a.high;
4719        roundingMode = float_rounding_mode();
4720        if ( roundingMode == float_round_nearest_even ) {
4721            z.high += lastBitMask>>1;
4722            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4723                z.high &= ~ lastBitMask;
4724            }
4725        }
4726        else if ( roundingMode != float_round_to_zero ) {
4727            if (   extractFloat128Sign( z )
4728                 ^ ( roundingMode == float_round_up ) ) {
4729                z.high |= ( a.low != 0 );
4730                z.high += roundBitsMask;
4731            }
4732        }
4733        z.high &= ~ roundBitsMask;
4734    }
4735    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4736        float_set_inexact();
4737    }
4738    return z;
4739
4740}
4741
4742/*
4743-------------------------------------------------------------------------------
4744Returns the result of adding the absolute values of the quadruple-precision
4745floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
4746before being returned.  `zSign' is ignored if the result is a NaN.
4747The addition is performed according to the IEC/IEEE Standard for Binary
4748Floating-Point Arithmetic.
4749-------------------------------------------------------------------------------
4750*/
4751static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
4752{
4753    int32 aExp, bExp, zExp;
4754    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4755    int32 expDiff;
4756
4757    aSig1 = extractFloat128Frac1( a );
4758    aSig0 = extractFloat128Frac0( a );
4759    aExp = extractFloat128Exp( a );
4760    bSig1 = extractFloat128Frac1( b );
4761    bSig0 = extractFloat128Frac0( b );
4762    bExp = extractFloat128Exp( b );
4763    expDiff = aExp - bExp;
4764    if ( 0 < expDiff ) {
4765        if ( aExp == 0x7FFF ) {
4766            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4767            return a;
4768        }
4769        if ( bExp == 0 ) {
4770            --expDiff;
4771        }
4772        else {
4773            bSig0 |= LIT64( 0x0001000000000000 );
4774        }
4775        shift128ExtraRightJamming(
4776            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4777        zExp = aExp;
4778    }
4779    else if ( expDiff < 0 ) {
4780        if ( bExp == 0x7FFF ) {
4781            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4782            return packFloat128( zSign, 0x7FFF, 0, 0 );
4783        }
4784        if ( aExp == 0 ) {
4785            ++expDiff;
4786        }
4787        else {
4788            aSig0 |= LIT64( 0x0001000000000000 );
4789        }
4790        shift128ExtraRightJamming(
4791            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4792        zExp = bExp;
4793    }
4794    else {
4795        if ( aExp == 0x7FFF ) {
4796            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4797                return propagateFloat128NaN( a, b );
4798            }
4799            return a;
4800        }
4801        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4802        if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
4803        zSig2 = 0;
4804        zSig0 |= LIT64( 0x0002000000000000 );
4805        zExp = aExp;
4806        goto shiftRight1;
4807    }
4808    aSig0 |= LIT64( 0x0001000000000000 );
4809    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4810    --zExp;
4811    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4812    ++zExp;
4813 shiftRight1:
4814    shift128ExtraRightJamming(
4815        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4816 roundAndPack:
4817    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
4818
4819}
4820
4821/*
4822-------------------------------------------------------------------------------
4823Returns the result of subtracting the absolute values of the quadruple-
4824precision floating-point values `a' and `b'.  If `zSign' is 1, the
4825difference is negated before being returned.  `zSign' is ignored if the
4826result is a NaN.  The subtraction is performed according to the IEC/IEEE
4827Standard for Binary Floating-Point Arithmetic.
4828-------------------------------------------------------------------------------
4829*/
4830static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
4831{
4832    int32 aExp, bExp, zExp;
4833    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4834    int32 expDiff;
4835    float128 z;
4836
4837    aSig1 = extractFloat128Frac1( a );
4838    aSig0 = extractFloat128Frac0( a );
4839    aExp = extractFloat128Exp( a );
4840    bSig1 = extractFloat128Frac1( b );
4841    bSig0 = extractFloat128Frac0( b );
4842    bExp = extractFloat128Exp( b );
4843    expDiff = aExp - bExp;
4844    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4845    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4846    if ( 0 < expDiff ) goto aExpBigger;
4847    if ( expDiff < 0 ) goto bExpBigger;
4848    if ( aExp == 0x7FFF ) {
4849        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4850            return propagateFloat128NaN( a, b );
4851        }
4852        float_raise( float_flag_invalid );
4853        z.low = float128_default_nan_low;
4854        z.high = float128_default_nan_high;
4855        return z;
4856    }
4857    if ( aExp == 0 ) {
4858        aExp = 1;
4859        bExp = 1;
4860    }
4861    if ( bSig0 < aSig0 ) goto aBigger;
4862    if ( aSig0 < bSig0 ) goto bBigger;
4863    if ( bSig1 < aSig1 ) goto aBigger;
4864    if ( aSig1 < bSig1 ) goto bBigger;
4865    return packFloat128( float_rounding_mode() == float_round_down, 0, 0, 0 );
4866 bExpBigger:
4867    if ( bExp == 0x7FFF ) {
4868        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4869        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4870    }
4871    if ( aExp == 0 ) {
4872        ++expDiff;
4873    }
4874    else {
4875        aSig0 |= LIT64( 0x4000000000000000 );
4876    }
4877    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4878    bSig0 |= LIT64( 0x4000000000000000 );
4879 bBigger:
4880    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4881    zExp = bExp;
4882    zSign ^= 1;
4883    goto normalizeRoundAndPack;
4884 aExpBigger:
4885    if ( aExp == 0x7FFF ) {
4886        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4887        return a;
4888    }
4889    if ( bExp == 0 ) {
4890        --expDiff;
4891    }
4892    else {
4893        bSig0 |= LIT64( 0x4000000000000000 );
4894    }
4895    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4896    aSig0 |= LIT64( 0x4000000000000000 );
4897 aBigger:
4898    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4899    zExp = aExp;
4900 normalizeRoundAndPack:
4901    --zExp;
4902    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
4903
4904}
4905
4906/*
4907-------------------------------------------------------------------------------
4908Returns the result of adding the quadruple-precision floating-point values
4909`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
4910for Binary Floating-Point Arithmetic.
4911-------------------------------------------------------------------------------
4912*/
4913float128 float128_add( float128 a, float128 b )
4914{
4915    flag aSign, bSign;
4916
4917    aSign = extractFloat128Sign( a );
4918    bSign = extractFloat128Sign( b );
4919    if ( aSign == bSign ) {
4920        return addFloat128Sigs( a, b, aSign );
4921    }
4922    else {
4923        return subFloat128Sigs( a, b, aSign );
4924    }
4925
4926}
4927
4928/*
4929-------------------------------------------------------------------------------
4930Returns the result of subtracting the quadruple-precision floating-point
4931values `a' and `b'.  The operation is performed according to the IEC/IEEE
4932Standard for Binary Floating-Point Arithmetic.
4933-------------------------------------------------------------------------------
4934*/
4935float128 float128_sub( float128 a, float128 b )
4936{
4937    flag aSign, bSign;
4938
4939    aSign = extractFloat128Sign( a );
4940    bSign = extractFloat128Sign( b );
4941    if ( aSign == bSign ) {
4942        return subFloat128Sigs( a, b, aSign );
4943    }
4944    else {
4945        return addFloat128Sigs( a, b, aSign );
4946    }
4947
4948}
4949
4950/*
4951-------------------------------------------------------------------------------
4952Returns the result of multiplying the quadruple-precision floating-point
4953values `a' and `b'.  The operation is performed according to the IEC/IEEE
4954Standard for Binary Floating-Point Arithmetic.
4955-------------------------------------------------------------------------------
4956*/
4957float128 float128_mul( float128 a, float128 b )
4958{
4959    flag aSign, bSign, zSign;
4960    int32 aExp, bExp, zExp;
4961    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
4962    float128 z;
4963
4964    aSig1 = extractFloat128Frac1( a );
4965    aSig0 = extractFloat128Frac0( a );
4966    aExp = extractFloat128Exp( a );
4967    aSign = extractFloat128Sign( a );
4968    bSig1 = extractFloat128Frac1( b );
4969    bSig0 = extractFloat128Frac0( b );
4970    bExp = extractFloat128Exp( b );
4971    bSign = extractFloat128Sign( b );
4972    zSign = aSign ^ bSign;
4973    if ( aExp == 0x7FFF ) {
4974        if (    ( aSig0 | aSig1 )
4975             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4976            return propagateFloat128NaN( a, b );
4977        }
4978        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
4979        return packFloat128( zSign, 0x7FFF, 0, 0 );
4980    }
4981    if ( bExp == 0x7FFF ) {
4982        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4983        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4984 invalid:
4985            float_raise( float_flag_invalid );
4986            z.low = float128_default_nan_low;
4987            z.high = float128_default_nan_high;
4988            return z;
4989        }
4990        return packFloat128( zSign, 0x7FFF, 0, 0 );
4991    }
4992    if ( aExp == 0 ) {
4993        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4994        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4995    }
4996    if ( bExp == 0 ) {
4997        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4998        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4999    }
5000    zExp = aExp + bExp - 0x4000;
5001    aSig0 |= LIT64( 0x0001000000000000 );
5002    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5003    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5004    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5005    zSig2 |= ( zSig3 != 0 );
5006    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5007        shift128ExtraRightJamming(
5008            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5009        ++zExp;
5010    }
5011    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5012
5013}
5014
5015/*
5016-------------------------------------------------------------------------------
5017Returns the result of dividing the quadruple-precision floating-point value
5018`a' by the corresponding value `b'.  The operation is performed according to
5019the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5020-------------------------------------------------------------------------------
5021*/
5022float128 float128_div( float128 a, float128 b )
5023{
5024    flag aSign, bSign, zSign;
5025    int32 aExp, bExp, zExp;
5026    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5027    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5028    float128 z;
5029
5030    aSig1 = extractFloat128Frac1( a );
5031    aSig0 = extractFloat128Frac0( a );
5032    aExp = extractFloat128Exp( a );
5033    aSign = extractFloat128Sign( a );
5034    bSig1 = extractFloat128Frac1( b );
5035    bSig0 = extractFloat128Frac0( b );
5036    bExp = extractFloat128Exp( b );
5037    bSign = extractFloat128Sign( b );
5038    zSign = aSign ^ bSign;
5039    if ( aExp == 0x7FFF ) {
5040        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
5041        if ( bExp == 0x7FFF ) {
5042            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5043            goto invalid;
5044        }
5045        return packFloat128( zSign, 0x7FFF, 0, 0 );
5046    }
5047    if ( bExp == 0x7FFF ) {
5048        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5049        return packFloat128( zSign, 0, 0, 0 );
5050    }
5051    if ( bExp == 0 ) {
5052        if ( ( bSig0 | bSig1 ) == 0 ) {
5053            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5054 invalid:
5055                float_raise( float_flag_invalid );
5056                z.low = float128_default_nan_low;
5057                z.high = float128_default_nan_high;
5058                return z;
5059            }
5060            float_raise( float_flag_divbyzero );
5061            return packFloat128( zSign, 0x7FFF, 0, 0 );
5062        }
5063        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5064    }
5065    if ( aExp == 0 ) {
5066        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5067        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5068    }
5069    zExp = aExp - bExp + 0x3FFD;
5070    shortShift128Left(
5071        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5072    shortShift128Left(
5073        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5074    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5075        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5076        ++zExp;
5077    }
5078    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5079    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5080    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5081    while ( (sbits64) rem0 < 0 ) {
5082        --zSig0;
5083        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5084    }
5085    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5086    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5087        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5088        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5089        while ( (sbits64) rem1 < 0 ) {
5090            --zSig1;
5091            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5092        }
5093        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5094    }
5095    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5096    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5097
5098}
5099
5100/*
5101-------------------------------------------------------------------------------
5102Returns the remainder of the quadruple-precision floating-point value `a'
5103with respect to the corresponding value `b'.  The operation is performed
5104according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5105-------------------------------------------------------------------------------
5106*/
5107float128 float128_rem( float128 a, float128 b )
5108{
5109    flag aSign, bSign, zSign;
5110    int32 aExp, bExp, expDiff;
5111    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5112    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
5113    sbits64 sigMean0;
5114    float128 z;
5115
5116    aSig1 = extractFloat128Frac1( a );
5117    aSig0 = extractFloat128Frac0( a );
5118    aExp = extractFloat128Exp( a );
5119    aSign = extractFloat128Sign( a );
5120    bSig1 = extractFloat128Frac1( b );
5121    bSig0 = extractFloat128Frac0( b );
5122    bExp = extractFloat128Exp( b );
5123    bSign = extractFloat128Sign( b );
5124    if ( aExp == 0x7FFF ) {
5125        if (    ( aSig0 | aSig1 )
5126             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5127            return propagateFloat128NaN( a, b );
5128        }
5129        goto invalid;
5130    }
5131    if ( bExp == 0x7FFF ) {
5132        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5133        return a;
5134    }
5135    if ( bExp == 0 ) {
5136        if ( ( bSig0 | bSig1 ) == 0 ) {
5137 invalid:
5138            float_raise( float_flag_invalid );
5139            z.low = float128_default_nan_low;
5140            z.high = float128_default_nan_high;
5141            return z;
5142        }
5143        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5144    }
5145    if ( aExp == 0 ) {
5146        if ( ( aSig0 | aSig1 ) == 0 ) return a;
5147        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5148    }
5149    expDiff = aExp - bExp;
5150    if ( expDiff < -1 ) return a;
5151    shortShift128Left(
5152        aSig0 | LIT64( 0x0001000000000000 ),
5153        aSig1,
5154        15 - ( expDiff < 0 ),
5155        &aSig0,
5156        &aSig1
5157    );
5158    shortShift128Left(
5159        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5160    q = le128( bSig0, bSig1, aSig0, aSig1 );
5161    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5162    expDiff -= 64;
5163    while ( 0 < expDiff ) {
5164        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5165        q = ( 4 < q ) ? q - 4 : 0;
5166        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5167        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5168        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5169        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5170        expDiff -= 61;
5171    }
5172    if ( -64 < expDiff ) {
5173        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5174        q = ( 4 < q ) ? q - 4 : 0;
5175        q >>= - expDiff;
5176        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5177        expDiff += 52;
5178        if ( expDiff < 0 ) {
5179            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5180        }
5181        else {
5182            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5183        }
5184        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5185        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5186    }
5187    else {
5188        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5189        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5190    }
5191    do {
5192        alternateASig0 = aSig0;
5193        alternateASig1 = aSig1;
5194        ++q;
5195        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5196    } while ( 0 <= (sbits64) aSig0 );
5197    add128(
5198        aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
5199    if (    ( sigMean0 < 0 )
5200         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5201        aSig0 = alternateASig0;
5202        aSig1 = alternateASig1;
5203    }
5204    zSign = ( (sbits64) aSig0 < 0 );
5205    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5206    return
5207        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
5208
5209}
5210
5211/*
5212-------------------------------------------------------------------------------
5213Returns the square root of the quadruple-precision floating-point value `a'.
5214The operation is performed according to the IEC/IEEE Standard for Binary
5215Floating-Point Arithmetic.
5216-------------------------------------------------------------------------------
5217*/
5218float128 float128_sqrt( float128 a )
5219{
5220    flag aSign;
5221    int32 aExp, zExp;
5222    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5223    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5224    float128 z;
5225
5226    aSig1 = extractFloat128Frac1( a );
5227    aSig0 = extractFloat128Frac0( a );
5228    aExp = extractFloat128Exp( a );
5229    aSign = extractFloat128Sign( a );
5230    if ( aExp == 0x7FFF ) {
5231        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
5232        if ( ! aSign ) return a;
5233        goto invalid;
5234    }
5235    if ( aSign ) {
5236        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5237 invalid:
5238        float_raise( float_flag_invalid );
5239        z.low = float128_default_nan_low;
5240        z.high = float128_default_nan_high;
5241        return z;
5242    }
5243    if ( aExp == 0 ) {
5244        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5245        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5246    }
5247    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5248    aSig0 |= LIT64( 0x0001000000000000 );
5249    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5250    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5251    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5252    doubleZSig0 = zSig0<<1;
5253    mul64To128( zSig0, zSig0, &term0, &term1 );
5254    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5255    while ( (sbits64) rem0 < 0 ) {
5256        --zSig0;
5257        doubleZSig0 -= 2;
5258        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5259    }
5260    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5261    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5262        if ( zSig1 == 0 ) zSig1 = 1;
5263        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5264        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5265        mul64To128( zSig1, zSig1, &term2, &term3 );
5266        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5267        while ( (sbits64) rem1 < 0 ) {
5268            --zSig1;
5269            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5270            term3 |= 1;
5271            term2 |= doubleZSig0;
5272            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5273        }
5274        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5275    }
5276    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5277    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
5278
5279}
5280
5281/*
5282-------------------------------------------------------------------------------
5283Returns 1 if the quadruple-precision floating-point value `a' is equal to
5284the corresponding value `b', and 0 otherwise.  The comparison is performed
5285according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5286-------------------------------------------------------------------------------
5287*/
5288flag float128_eq( float128 a, float128 b )
5289{
5290
5291    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5292              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5293         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5294              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5295       ) {
5296        if (    float128_is_signaling_nan( a )
5297             || float128_is_signaling_nan( b ) ) {
5298            float_raise( float_flag_invalid );
5299        }
5300        return 0;
5301    }
5302    return
5303           ( a.low == b.low )
5304        && (    ( a.high == b.high )
5305             || (    ( a.low == 0 )
5306                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5307           );
5308
5309}
5310
5311/*
5312-------------------------------------------------------------------------------
5313Returns 1 if the quadruple-precision floating-point value `a' is less than
5314or equal to the corresponding value `b', and 0 otherwise.  The comparison
5315is performed according to the IEC/IEEE Standard for Binary Floating-Point
5316Arithmetic.
5317-------------------------------------------------------------------------------
5318*/
5319flag float128_le( float128 a, float128 b )
5320{
5321    flag aSign, bSign;
5322
5323    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5324              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5325         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5326              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5327       ) {
5328        float_raise( float_flag_invalid );
5329        return 0;
5330    }
5331    aSign = extractFloat128Sign( a );
5332    bSign = extractFloat128Sign( b );
5333    if ( aSign != bSign ) {
5334        return
5335               aSign
5336            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5337                 == 0 );
5338    }
5339    return
5340          aSign ? le128( b.high, b.low, a.high, a.low )
5341        : le128( a.high, a.low, b.high, b.low );
5342
5343}
5344
5345/*
5346-------------------------------------------------------------------------------
5347Returns 1 if the quadruple-precision floating-point value `a' is less than
5348the corresponding value `b', and 0 otherwise.  The comparison is performed
5349according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5350-------------------------------------------------------------------------------
5351*/
5352flag float128_lt( float128 a, float128 b )
5353{
5354    flag aSign, bSign;
5355
5356    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5357              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5358         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5359              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5360       ) {
5361        float_raise( float_flag_invalid );
5362        return 0;
5363    }
5364    aSign = extractFloat128Sign( a );
5365    bSign = extractFloat128Sign( b );
5366    if ( aSign != bSign ) {
5367        return
5368               aSign
5369            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5370                 != 0 );
5371    }
5372    return
5373          aSign ? lt128( b.high, b.low, a.high, a.low )
5374        : lt128( a.high, a.low, b.high, b.low );
5375
5376}
5377
5378/*
5379-------------------------------------------------------------------------------
5380Returns 1 if the quadruple-precision floating-point value `a' is equal to
5381the corresponding value `b', and 0 otherwise.  The invalid exception is
5382raised if either operand is a NaN.  Otherwise, the comparison is performed
5383according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5384-------------------------------------------------------------------------------
5385*/
5386flag float128_eq_signaling( float128 a, float128 b )
5387{
5388
5389    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5390              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5391         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5392              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5393       ) {
5394        float_raise( float_flag_invalid );
5395        return 0;
5396    }
5397    return
5398           ( a.low == b.low )
5399        && (    ( a.high == b.high )
5400             || (    ( a.low == 0 )
5401                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5402           );
5403
5404}
5405
5406/*
5407-------------------------------------------------------------------------------
5408Returns 1 if the quadruple-precision floating-point value `a' is less than
5409or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5410cause an exception.  Otherwise, the comparison is performed according to the
5411IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5412-------------------------------------------------------------------------------
5413*/
5414flag float128_le_quiet( float128 a, float128 b )
5415{
5416    flag aSign, bSign;
5417
5418    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5419              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5420         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5421              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5422       ) {
5423        if (    float128_is_signaling_nan( a )
5424             || float128_is_signaling_nan( b ) ) {
5425            float_raise( float_flag_invalid );
5426        }
5427        return 0;
5428    }
5429    aSign = extractFloat128Sign( a );
5430    bSign = extractFloat128Sign( b );
5431    if ( aSign != bSign ) {
5432        return
5433               aSign
5434            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5435                 == 0 );
5436    }
5437    return
5438          aSign ? le128( b.high, b.low, a.high, a.low )
5439        : le128( a.high, a.low, b.high, b.low );
5440
5441}
5442
5443/*
5444-------------------------------------------------------------------------------
5445Returns 1 if the quadruple-precision floating-point value `a' is less than
5446the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5447exception.  Otherwise, the comparison is performed according to the IEC/IEEE
5448Standard for Binary Floating-Point Arithmetic.
5449-------------------------------------------------------------------------------
5450*/
5451flag float128_lt_quiet( float128 a, float128 b )
5452{
5453    flag aSign, bSign;
5454
5455    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5456              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5457         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5458              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5459       ) {
5460        if (    float128_is_signaling_nan( a )
5461             || float128_is_signaling_nan( b ) ) {
5462            float_raise( float_flag_invalid );
5463        }
5464        return 0;
5465    }
5466    aSign = extractFloat128Sign( a );
5467    bSign = extractFloat128Sign( b );
5468    if ( aSign != bSign ) {
5469        return
5470               aSign
5471            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5472                 != 0 );
5473    }
5474    return
5475          aSign ? lt128( b.high, b.low, a.high, a.low )
5476        : lt128( a.high, a.low, b.high, b.low );
5477
5478}
5479
5480#endif
5481
5482
5483#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
5484
5485/*
5486 * These two routines are not part of the original softfloat distribution.
5487 *
5488 * They are based on the corresponding conversions to integer but return
5489 * unsigned numbers instead since these functions are required by GCC.
5490 *
5491 * Added by Mark Brinicombe <mark@netbsd.org>	27/09/97
5492 *
5493 * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
5494 */
5495
5496/*
5497-------------------------------------------------------------------------------
5498Returns the result of converting the double-precision floating-point value
5499`a' to the 32-bit unsigned integer format.  The conversion is
5500performed according to the IEC/IEEE Standard for Binary Floating-point
5501Arithmetic, except that the conversion is always rounded toward zero.  If
5502`a' is a NaN, the largest positive integer is returned.  If the conversion
5503overflows, the largest integer positive is returned.
5504-------------------------------------------------------------------------------
5505*/
5506uint32 float64_to_uint32_round_to_zero( float64 a )
5507{
5508    flag aSign;
5509    int16 aExp, shiftCount;
5510    bits64 aSig, savedASig;
5511    uint32 z;
5512
5513    aSig = extractFloat64Frac( a );
5514    aExp = extractFloat64Exp( a );
5515    aSign = extractFloat64Sign( a );
5516
5517    if (aSign) {
5518        float_raise( float_flag_invalid );
5519    	return(0);
5520    }
5521
5522    if ( 0x41E < aExp ) {
5523        float_raise( float_flag_invalid );
5524        return 0xffffffff;
5525    }
5526    else if ( aExp < 0x3FF ) {
5527        if ( aExp || aSig ) float_set_inexact();
5528        return 0;
5529    }
5530    aSig |= LIT64( 0x0010000000000000 );
5531    shiftCount = 0x433 - aExp;
5532    savedASig = aSig;
5533    aSig >>= shiftCount;
5534    z = aSig;
5535    if ( ( aSig<<shiftCount ) != savedASig ) {
5536        float_set_inexact();
5537    }
5538    return z;
5539
5540}
5541
5542/*
5543-------------------------------------------------------------------------------
5544Returns the result of converting the single-precision floating-point value
5545`a' to the 32-bit unsigned integer format.  The conversion is
5546performed according to the IEC/IEEE Standard for Binary Floating-point
5547Arithmetic, except that the conversion is always rounded toward zero.  If
5548`a' is a NaN, the largest positive integer is returned.  If the conversion
5549overflows, the largest positive integer is returned.
5550-------------------------------------------------------------------------------
5551*/
5552uint32 float32_to_uint32_round_to_zero( float32 a )
5553{
5554    flag aSign;
5555    int16 aExp, shiftCount;
5556    bits32 aSig;
5557    uint32 z;
5558
5559    aSig = extractFloat32Frac( a );
5560    aExp = extractFloat32Exp( a );
5561    aSign = extractFloat32Sign( a );
5562    shiftCount = aExp - 0x9E;
5563
5564    if (aSign) {
5565        float_raise( float_flag_invalid );
5566    	return(0);
5567    }
5568    if ( 0 < shiftCount ) {
5569        float_raise( float_flag_invalid );
5570        return 0xFFFFFFFF;
5571    }
5572    else if ( aExp <= 0x7E ) {
5573        if ( aExp | aSig ) float_set_inexact();
5574        return 0;
5575    }
5576    aSig = ( aSig | 0x800000 )<<8;
5577    z = aSig>>( - shiftCount );
5578    if ( aSig<<( shiftCount & 31 ) ) {
5579        float_set_inexact();
5580    }
5581    return z;
5582
5583}
5584
5585#endif
5586