1230363Sdas/* $NetBSD: softfloat.c,v 1.8 2011/07/10 04:52:23 matt Exp $ */
2129203Scognet
3129203Scognet/*
4129203Scognet * This version hacked for use with gcc -msoft-float by bjh21.
5129203Scognet * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6129203Scognet *  itself).
7129203Scognet */
8129203Scognet
9129203Scognet/*
10129203Scognet * Things you may want to define:
11129203Scognet *
12129203Scognet * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13129203Scognet *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
14129203Scognet *   properly renamed.
15129203Scognet */
16129203Scognet
17129203Scognet/*
18129203Scognet===============================================================================
19129203Scognet
20129203ScognetThis C source file is part of the SoftFloat IEC/IEEE Floating-point
21129203ScognetArithmetic Package, Release 2a.
22129203Scognet
23129203ScognetWritten by John R. Hauser.  This work was made possible in part by the
24129203ScognetInternational Computer Science Institute, located at Suite 600, 1947 Center
25129203ScognetStreet, Berkeley, California 94704.  Funding was partially provided by the
26129203ScognetNational Science Foundation under grant MIP-9311980.  The original version
27129203Scognetof this code was written as part of a project to build a fixed-point vector
28129203Scognetprocessor in collaboration with the University of California at Berkeley,
29129203Scognetoverseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30129203Scognetis available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31129203Scognetarithmetic/SoftFloat.html'.
32129203Scognet
33129203ScognetTHIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34129203Scognethas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35129203ScognetTIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36129203ScognetPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37129203ScognetAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38129203Scognet
39129203ScognetDerivative works are acceptable, even for commercial purposes, so long as
40129203Scognet(1) they include prominent notice that the work is derivative, and (2) they
41129203Scognetinclude prominent notice akin to these four paragraphs for those parts of
42129203Scognetthis code that are retained.
43129203Scognet
44129203Scognet===============================================================================
45129203Scognet*/
46129203Scognet
47129203Scognet#include <sys/cdefs.h>
48129203Scognet__FBSDID("$FreeBSD$");
49129203Scognet
50129203Scognet#ifdef SOFTFLOAT_FOR_GCC
51129203Scognet#include "softfloat-for-gcc.h"
52129203Scognet#endif
53129203Scognet
54129203Scognet#include "milieu.h"
55129203Scognet#include "softfloat.h"
56129203Scognet
57129203Scognet/*
58129203Scognet * Conversions between floats as stored in memory and floats as
59129203Scognet * SoftFloat uses them
60129203Scognet */
61129203Scognet#ifndef FLOAT64_DEMANGLE
62129203Scognet#define FLOAT64_DEMANGLE(a)	(a)
63129203Scognet#endif
64129203Scognet#ifndef FLOAT64_MANGLE
65129203Scognet#define FLOAT64_MANGLE(a)	(a)
66129203Scognet#endif
67129203Scognet
68129203Scognet/*
69129203Scognet-------------------------------------------------------------------------------
70129203ScognetFloating-point rounding mode, extended double-precision rounding precision,
71129203Scognetand exception flags.
72129203Scognet-------------------------------------------------------------------------------
73129203Scognet*/
74230189Sdasint float_rounding_mode = float_round_nearest_even;
75230189Sdasint float_exception_flags = 0;
76129203Scognet#ifdef FLOATX80
77129203Scognetint8 floatx80_rounding_precision = 80;
78129203Scognet#endif
79129203Scognet
80129203Scognet/*
81129203Scognet-------------------------------------------------------------------------------
82129203ScognetPrimitive arithmetic functions, including multi-word arithmetic, and
83129203Scognetdivision and square root approximations.  (Can be specialized to target if
84129203Scognetdesired.)
85129203Scognet-------------------------------------------------------------------------------
86129203Scognet*/
87129203Scognet#include "softfloat-macros"
88129203Scognet
89129203Scognet/*
90129203Scognet-------------------------------------------------------------------------------
91129203ScognetFunctions and definitions to determine:  (1) whether tininess for underflow
92129203Scognetis detected before or after rounding by default, (2) what (if anything)
93129203Scognethappens when exceptions are raised, (3) how signaling NaNs are distinguished
94129203Scognetfrom quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
95129203Scognetare propagated from function inputs to output.  These details are target-
96129203Scognetspecific.
97129203Scognet-------------------------------------------------------------------------------
98129203Scognet*/
99129203Scognet#include "softfloat-specialize"
100129203Scognet
101129203Scognet#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
102129203Scognet/*
103129203Scognet-------------------------------------------------------------------------------
104129203ScognetTakes a 64-bit fixed-point value `absZ' with binary point between bits 6
105129203Scognetand 7, and returns the properly rounded 32-bit integer corresponding to the
106129203Scognetinput.  If `zSign' is 1, the input is negated before being converted to an
107129203Scognetinteger.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
108129203Scognetis simply rounded to an integer, with the inexact exception raised if the
109129203Scognetinput cannot be represented exactly as an integer.  However, if the fixed-
110129203Scognetpoint input is too large, the invalid exception is raised and the largest
111129203Scognetpositive or negative integer is returned.
112129203Scognet-------------------------------------------------------------------------------
113129203Scognet*/
114129203Scognetstatic int32 roundAndPackInt32( flag zSign, bits64 absZ )
115129203Scognet{
116129203Scognet    int8 roundingMode;
117129203Scognet    flag roundNearestEven;
118129203Scognet    int8 roundIncrement, roundBits;
119129203Scognet    int32 z;
120129203Scognet
121129203Scognet    roundingMode = float_rounding_mode;
122129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
123129203Scognet    roundIncrement = 0x40;
124129203Scognet    if ( ! roundNearestEven ) {
125129203Scognet        if ( roundingMode == float_round_to_zero ) {
126129203Scognet            roundIncrement = 0;
127129203Scognet        }
128129203Scognet        else {
129129203Scognet            roundIncrement = 0x7F;
130129203Scognet            if ( zSign ) {
131129203Scognet                if ( roundingMode == float_round_up ) roundIncrement = 0;
132129203Scognet            }
133129203Scognet            else {
134129203Scognet                if ( roundingMode == float_round_down ) roundIncrement = 0;
135129203Scognet            }
136129203Scognet        }
137129203Scognet    }
138129203Scognet    roundBits = absZ & 0x7F;
139129203Scognet    absZ = ( absZ + roundIncrement )>>7;
140129203Scognet    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
141129203Scognet    z = absZ;
142129203Scognet    if ( zSign ) z = - z;
143129203Scognet    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
144129203Scognet        float_raise( float_flag_invalid );
145129203Scognet        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
146129203Scognet    }
147129203Scognet    if ( roundBits ) float_exception_flags |= float_flag_inexact;
148129203Scognet    return z;
149129203Scognet
150129203Scognet}
151129203Scognet
152129203Scognet/*
153129203Scognet-------------------------------------------------------------------------------
154129203ScognetTakes the 128-bit fixed-point value formed by concatenating `absZ0' and
155129203Scognet`absZ1', with binary point between bits 63 and 64 (between the input words),
156129203Scognetand returns the properly rounded 64-bit integer corresponding to the input.
157129203ScognetIf `zSign' is 1, the input is negated before being converted to an integer.
158129203ScognetOrdinarily, the fixed-point input is simply rounded to an integer, with
159129203Scognetthe inexact exception raised if the input cannot be represented exactly as
160129203Scognetan integer.  However, if the fixed-point input is too large, the invalid
161129203Scognetexception is raised and the largest positive or negative integer is
162129203Scognetreturned.
163129203Scognet-------------------------------------------------------------------------------
164129203Scognet*/
165129203Scognetstatic int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
166129203Scognet{
167129203Scognet    int8 roundingMode;
168129203Scognet    flag roundNearestEven, increment;
169129203Scognet    int64 z;
170129203Scognet
171129203Scognet    roundingMode = float_rounding_mode;
172129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
173129203Scognet    increment = ( (sbits64) absZ1 < 0 );
174129203Scognet    if ( ! roundNearestEven ) {
175129203Scognet        if ( roundingMode == float_round_to_zero ) {
176129203Scognet            increment = 0;
177129203Scognet        }
178129203Scognet        else {
179129203Scognet            if ( zSign ) {
180129203Scognet                increment = ( roundingMode == float_round_down ) && absZ1;
181129203Scognet            }
182129203Scognet            else {
183129203Scognet                increment = ( roundingMode == float_round_up ) && absZ1;
184129203Scognet            }
185129203Scognet        }
186129203Scognet    }
187129203Scognet    if ( increment ) {
188129203Scognet        ++absZ0;
189129203Scognet        if ( absZ0 == 0 ) goto overflow;
190129203Scognet        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
191129203Scognet    }
192129203Scognet    z = absZ0;
193129203Scognet    if ( zSign ) z = - z;
194129203Scognet    if ( z && ( ( z < 0 ) ^ zSign ) ) {
195129203Scognet overflow:
196129203Scognet        float_raise( float_flag_invalid );
197129203Scognet        return
198129203Scognet              zSign ? (sbits64) LIT64( 0x8000000000000000 )
199129203Scognet            : LIT64( 0x7FFFFFFFFFFFFFFF );
200129203Scognet    }
201129203Scognet    if ( absZ1 ) float_exception_flags |= float_flag_inexact;
202129203Scognet    return z;
203129203Scognet
204129203Scognet}
205129203Scognet#endif
206129203Scognet
207129203Scognet/*
208129203Scognet-------------------------------------------------------------------------------
209129203ScognetReturns the fraction bits of the single-precision floating-point value `a'.
210129203Scognet-------------------------------------------------------------------------------
211129203Scognet*/
212129203ScognetINLINE bits32 extractFloat32Frac( float32 a )
213129203Scognet{
214129203Scognet
215129203Scognet    return a & 0x007FFFFF;
216129203Scognet
217129203Scognet}
218129203Scognet
219129203Scognet/*
220129203Scognet-------------------------------------------------------------------------------
221129203ScognetReturns the exponent bits of the single-precision floating-point value `a'.
222129203Scognet-------------------------------------------------------------------------------
223129203Scognet*/
224129203ScognetINLINE int16 extractFloat32Exp( float32 a )
225129203Scognet{
226129203Scognet
227129203Scognet    return ( a>>23 ) & 0xFF;
228129203Scognet
229129203Scognet}
230129203Scognet
231129203Scognet/*
232129203Scognet-------------------------------------------------------------------------------
233129203ScognetReturns the sign bit of the single-precision floating-point value `a'.
234129203Scognet-------------------------------------------------------------------------------
235129203Scognet*/
236129203ScognetINLINE flag extractFloat32Sign( float32 a )
237129203Scognet{
238129203Scognet
239129203Scognet    return a>>31;
240129203Scognet
241129203Scognet}
242129203Scognet
243129203Scognet/*
244129203Scognet-------------------------------------------------------------------------------
245129203ScognetNormalizes the subnormal single-precision floating-point value represented
246129203Scognetby the denormalized significand `aSig'.  The normalized exponent and
247129203Scognetsignificand are stored at the locations pointed to by `zExpPtr' and
248129203Scognet`zSigPtr', respectively.
249129203Scognet-------------------------------------------------------------------------------
250129203Scognet*/
251129203Scognetstatic void
252129203Scognet normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
253129203Scognet{
254129203Scognet    int8 shiftCount;
255129203Scognet
256129203Scognet    shiftCount = countLeadingZeros32( aSig ) - 8;
257129203Scognet    *zSigPtr = aSig<<shiftCount;
258129203Scognet    *zExpPtr = 1 - shiftCount;
259129203Scognet
260129203Scognet}
261129203Scognet
262129203Scognet/*
263129203Scognet-------------------------------------------------------------------------------
264129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into a
265129203Scognetsingle-precision floating-point value, returning the result.  After being
266129203Scognetshifted into the proper positions, the three fields are simply added
267129203Scognettogether to form the result.  This means that any integer portion of `zSig'
268129203Scognetwill be added into the exponent.  Since a properly normalized significand
269129203Scognetwill have an integer portion equal to 1, the `zExp' input should be 1 less
270129203Scognetthan the desired result exponent whenever `zSig' is a complete, normalized
271129203Scognetsignificand.
272129203Scognet-------------------------------------------------------------------------------
273129203Scognet*/
274129203ScognetINLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
275129203Scognet{
276129203Scognet
277129203Scognet    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
278129203Scognet
279129203Scognet}
280129203Scognet
281129203Scognet/*
282129203Scognet-------------------------------------------------------------------------------
283129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
284129203Scognetand significand `zSig', and returns the proper single-precision floating-
285129203Scognetpoint value corresponding to the abstract input.  Ordinarily, the abstract
286129203Scognetvalue is simply rounded and packed into the single-precision format, with
287129203Scognetthe inexact exception raised if the abstract input cannot be represented
288129203Scognetexactly.  However, if the abstract value is too large, the overflow and
289129203Scognetinexact exceptions are raised and an infinity or maximal finite value is
290129203Scognetreturned.  If the abstract value is too small, the input value is rounded to
291129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if
292129203Scognetthe abstract input cannot be represented exactly as a subnormal single-
293129203Scognetprecision floating-point number.
294129203Scognet    The input significand `zSig' has its binary point between bits 30
295129203Scognetand 29, which is 7 bits to the left of the usual location.  This shifted
296129203Scognetsignificand must be normalized or smaller.  If `zSig' is not normalized,
297129203Scognet`zExp' must be 0; in that case, the result returned is a subnormal number,
298129203Scognetand it must not require rounding.  In the usual case that `zSig' is
299129203Scognetnormalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
300129203ScognetThe handling of underflow and overflow follows the IEC/IEEE Standard for
301129203ScognetBinary Floating-Point Arithmetic.
302129203Scognet-------------------------------------------------------------------------------
303129203Scognet*/
304129203Scognetstatic float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
305129203Scognet{
306129203Scognet    int8 roundingMode;
307129203Scognet    flag roundNearestEven;
308129203Scognet    int8 roundIncrement, roundBits;
309129203Scognet    flag isTiny;
310129203Scognet
311129203Scognet    roundingMode = float_rounding_mode;
312129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
313129203Scognet    roundIncrement = 0x40;
314129203Scognet    if ( ! roundNearestEven ) {
315129203Scognet        if ( roundingMode == float_round_to_zero ) {
316129203Scognet            roundIncrement = 0;
317129203Scognet        }
318129203Scognet        else {
319129203Scognet            roundIncrement = 0x7F;
320129203Scognet            if ( zSign ) {
321129203Scognet                if ( roundingMode == float_round_up ) roundIncrement = 0;
322129203Scognet            }
323129203Scognet            else {
324129203Scognet                if ( roundingMode == float_round_down ) roundIncrement = 0;
325129203Scognet            }
326129203Scognet        }
327129203Scognet    }
328129203Scognet    roundBits = zSig & 0x7F;
329129203Scognet    if ( 0xFD <= (bits16) zExp ) {
330129203Scognet        if (    ( 0xFD < zExp )
331129203Scognet             || (    ( zExp == 0xFD )
332129203Scognet                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
333129203Scognet           ) {
334129203Scognet            float_raise( float_flag_overflow | float_flag_inexact );
335129203Scognet            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
336129203Scognet        }
337129203Scognet        if ( zExp < 0 ) {
338129203Scognet            isTiny =
339129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
340129203Scognet                || ( zExp < -1 )
341129203Scognet                || ( zSig + roundIncrement < 0x80000000 );
342129203Scognet            shift32RightJamming( zSig, - zExp, &zSig );
343129203Scognet            zExp = 0;
344129203Scognet            roundBits = zSig & 0x7F;
345129203Scognet            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
346129203Scognet        }
347129203Scognet    }
348129203Scognet    if ( roundBits ) float_exception_flags |= float_flag_inexact;
349129203Scognet    zSig = ( zSig + roundIncrement )>>7;
350129203Scognet    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
351129203Scognet    if ( zSig == 0 ) zExp = 0;
352129203Scognet    return packFloat32( zSign, zExp, zSig );
353129203Scognet
354129203Scognet}
355129203Scognet
356129203Scognet/*
357129203Scognet-------------------------------------------------------------------------------
358129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
359129203Scognetand significand `zSig', and returns the proper single-precision floating-
360129203Scognetpoint value corresponding to the abstract input.  This routine is just like
361129203Scognet`roundAndPackFloat32' except that `zSig' does not have to be normalized.
362129203ScognetBit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
363129203Scognetfloating-point exponent.
364129203Scognet-------------------------------------------------------------------------------
365129203Scognet*/
366129203Scognetstatic float32
367129203Scognet normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
368129203Scognet{
369129203Scognet    int8 shiftCount;
370129203Scognet
371129203Scognet    shiftCount = countLeadingZeros32( zSig ) - 1;
372129203Scognet    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
373129203Scognet
374129203Scognet}
375129203Scognet
376129203Scognet/*
377129203Scognet-------------------------------------------------------------------------------
378129203ScognetReturns the fraction bits of the double-precision floating-point value `a'.
379129203Scognet-------------------------------------------------------------------------------
380129203Scognet*/
381129203ScognetINLINE bits64 extractFloat64Frac( float64 a )
382129203Scognet{
383129203Scognet
384129203Scognet    return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
385129203Scognet
386129203Scognet}
387129203Scognet
388129203Scognet/*
389129203Scognet-------------------------------------------------------------------------------
390129203ScognetReturns the exponent bits of the double-precision floating-point value `a'.
391129203Scognet-------------------------------------------------------------------------------
392129203Scognet*/
393129203ScognetINLINE int16 extractFloat64Exp( float64 a )
394129203Scognet{
395129203Scognet
396129203Scognet    return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
397129203Scognet
398129203Scognet}
399129203Scognet
400129203Scognet/*
401129203Scognet-------------------------------------------------------------------------------
402129203ScognetReturns the sign bit of the double-precision floating-point value `a'.
403129203Scognet-------------------------------------------------------------------------------
404129203Scognet*/
405129203ScognetINLINE flag extractFloat64Sign( float64 a )
406129203Scognet{
407129203Scognet
408129203Scognet    return FLOAT64_DEMANGLE(a)>>63;
409129203Scognet
410129203Scognet}
411129203Scognet
412129203Scognet/*
413129203Scognet-------------------------------------------------------------------------------
414129203ScognetNormalizes the subnormal double-precision floating-point value represented
415129203Scognetby the denormalized significand `aSig'.  The normalized exponent and
416129203Scognetsignificand are stored at the locations pointed to by `zExpPtr' and
417129203Scognet`zSigPtr', respectively.
418129203Scognet-------------------------------------------------------------------------------
419129203Scognet*/
420129203Scognetstatic void
421129203Scognet normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
422129203Scognet{
423129203Scognet    int8 shiftCount;
424129203Scognet
425129203Scognet    shiftCount = countLeadingZeros64( aSig ) - 11;
426129203Scognet    *zSigPtr = aSig<<shiftCount;
427129203Scognet    *zExpPtr = 1 - shiftCount;
428129203Scognet
429129203Scognet}
430129203Scognet
431129203Scognet/*
432129203Scognet-------------------------------------------------------------------------------
433129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into a
434129203Scognetdouble-precision floating-point value, returning the result.  After being
435129203Scognetshifted into the proper positions, the three fields are simply added
436129203Scognettogether to form the result.  This means that any integer portion of `zSig'
437129203Scognetwill be added into the exponent.  Since a properly normalized significand
438129203Scognetwill have an integer portion equal to 1, the `zExp' input should be 1 less
439129203Scognetthan the desired result exponent whenever `zSig' is a complete, normalized
440129203Scognetsignificand.
441129203Scognet-------------------------------------------------------------------------------
442129203Scognet*/
443129203ScognetINLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
444129203Scognet{
445129203Scognet
446129203Scognet    return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
447129203Scognet			   ( ( (bits64) zExp )<<52 ) + zSig );
448129203Scognet
449129203Scognet}
450129203Scognet
451129203Scognet/*
452129203Scognet-------------------------------------------------------------------------------
453129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
454129203Scognetand significand `zSig', and returns the proper double-precision floating-
455129203Scognetpoint value corresponding to the abstract input.  Ordinarily, the abstract
456129203Scognetvalue is simply rounded and packed into the double-precision format, with
457129203Scognetthe inexact exception raised if the abstract input cannot be represented
458129203Scognetexactly.  However, if the abstract value is too large, the overflow and
459129203Scognetinexact exceptions are raised and an infinity or maximal finite value is
460129203Scognetreturned.  If the abstract value is too small, the input value is rounded to
461129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if
462129203Scognetthe abstract input cannot be represented exactly as a subnormal double-
463129203Scognetprecision floating-point number.
464129203Scognet    The input significand `zSig' has its binary point between bits 62
465129203Scognetand 61, which is 10 bits to the left of the usual location.  This shifted
466129203Scognetsignificand must be normalized or smaller.  If `zSig' is not normalized,
467129203Scognet`zExp' must be 0; in that case, the result returned is a subnormal number,
468129203Scognetand it must not require rounding.  In the usual case that `zSig' is
469129203Scognetnormalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
470129203ScognetThe handling of underflow and overflow follows the IEC/IEEE Standard for
471129203ScognetBinary Floating-Point Arithmetic.
472129203Scognet-------------------------------------------------------------------------------
473129203Scognet*/
474129203Scognetstatic float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
475129203Scognet{
476129203Scognet    int8 roundingMode;
477129203Scognet    flag roundNearestEven;
478129203Scognet    int16 roundIncrement, roundBits;
479129203Scognet    flag isTiny;
480129203Scognet
481129203Scognet    roundingMode = float_rounding_mode;
482129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
483129203Scognet    roundIncrement = 0x200;
484129203Scognet    if ( ! roundNearestEven ) {
485129203Scognet        if ( roundingMode == float_round_to_zero ) {
486129203Scognet            roundIncrement = 0;
487129203Scognet        }
488129203Scognet        else {
489129203Scognet            roundIncrement = 0x3FF;
490129203Scognet            if ( zSign ) {
491129203Scognet                if ( roundingMode == float_round_up ) roundIncrement = 0;
492129203Scognet            }
493129203Scognet            else {
494129203Scognet                if ( roundingMode == float_round_down ) roundIncrement = 0;
495129203Scognet            }
496129203Scognet        }
497129203Scognet    }
498129203Scognet    roundBits = zSig & 0x3FF;
499129203Scognet    if ( 0x7FD <= (bits16) zExp ) {
500129203Scognet        if (    ( 0x7FD < zExp )
501129203Scognet             || (    ( zExp == 0x7FD )
502129203Scognet                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
503129203Scognet           ) {
504129203Scognet            float_raise( float_flag_overflow | float_flag_inexact );
505129203Scognet            return FLOAT64_MANGLE(
506129203Scognet		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
507129203Scognet		( roundIncrement == 0 ));
508129203Scognet        }
509129203Scognet        if ( zExp < 0 ) {
510129203Scognet            isTiny =
511129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
512129203Scognet                || ( zExp < -1 )
513129203Scognet                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
514129203Scognet            shift64RightJamming( zSig, - zExp, &zSig );
515129203Scognet            zExp = 0;
516129203Scognet            roundBits = zSig & 0x3FF;
517129203Scognet            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
518129203Scognet        }
519129203Scognet    }
520129203Scognet    if ( roundBits ) float_exception_flags |= float_flag_inexact;
521129203Scognet    zSig = ( zSig + roundIncrement )>>10;
522129203Scognet    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
523129203Scognet    if ( zSig == 0 ) zExp = 0;
524129203Scognet    return packFloat64( zSign, zExp, zSig );
525129203Scognet
526129203Scognet}
527129203Scognet
528129203Scognet/*
529129203Scognet-------------------------------------------------------------------------------
530129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
531129203Scognetand significand `zSig', and returns the proper double-precision floating-
532129203Scognetpoint value corresponding to the abstract input.  This routine is just like
533129203Scognet`roundAndPackFloat64' except that `zSig' does not have to be normalized.
534129203ScognetBit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
535129203Scognetfloating-point exponent.
536129203Scognet-------------------------------------------------------------------------------
537129203Scognet*/
538129203Scognetstatic float64
539129203Scognet normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
540129203Scognet{
541129203Scognet    int8 shiftCount;
542129203Scognet
543129203Scognet    shiftCount = countLeadingZeros64( zSig ) - 1;
544129203Scognet    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
545129203Scognet
546129203Scognet}
547129203Scognet
548129203Scognet#ifdef FLOATX80
549129203Scognet
550129203Scognet/*
551129203Scognet-------------------------------------------------------------------------------
552129203ScognetReturns the fraction bits of the extended double-precision floating-point
553129203Scognetvalue `a'.
554129203Scognet-------------------------------------------------------------------------------
555129203Scognet*/
556129203ScognetINLINE bits64 extractFloatx80Frac( floatx80 a )
557129203Scognet{
558129203Scognet
559129203Scognet    return a.low;
560129203Scognet
561129203Scognet}
562129203Scognet
563129203Scognet/*
564129203Scognet-------------------------------------------------------------------------------
565129203ScognetReturns the exponent bits of the extended double-precision floating-point
566129203Scognetvalue `a'.
567129203Scognet-------------------------------------------------------------------------------
568129203Scognet*/
569129203ScognetINLINE int32 extractFloatx80Exp( floatx80 a )
570129203Scognet{
571129203Scognet
572129203Scognet    return a.high & 0x7FFF;
573129203Scognet
574129203Scognet}
575129203Scognet
576129203Scognet/*
577129203Scognet-------------------------------------------------------------------------------
578129203ScognetReturns the sign bit of the extended double-precision floating-point value
579129203Scognet`a'.
580129203Scognet-------------------------------------------------------------------------------
581129203Scognet*/
582129203ScognetINLINE flag extractFloatx80Sign( floatx80 a )
583129203Scognet{
584129203Scognet
585129203Scognet    return a.high>>15;
586129203Scognet
587129203Scognet}
588129203Scognet
589129203Scognet/*
590129203Scognet-------------------------------------------------------------------------------
591129203ScognetNormalizes the subnormal extended double-precision floating-point value
592129203Scognetrepresented by the denormalized significand `aSig'.  The normalized exponent
593129203Scognetand significand are stored at the locations pointed to by `zExpPtr' and
594129203Scognet`zSigPtr', respectively.
595129203Scognet-------------------------------------------------------------------------------
596129203Scognet*/
597129203Scognetstatic void
598129203Scognet normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
599129203Scognet{
600129203Scognet    int8 shiftCount;
601129203Scognet
602129203Scognet    shiftCount = countLeadingZeros64( aSig );
603129203Scognet    *zSigPtr = aSig<<shiftCount;
604129203Scognet    *zExpPtr = 1 - shiftCount;
605129203Scognet
606129203Scognet}
607129203Scognet
608129203Scognet/*
609129203Scognet-------------------------------------------------------------------------------
610129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into an
611129203Scognetextended double-precision floating-point value, returning the result.
612129203Scognet-------------------------------------------------------------------------------
613129203Scognet*/
614129203ScognetINLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
615129203Scognet{
616129203Scognet    floatx80 z;
617129203Scognet
618129203Scognet    z.low = zSig;
619129203Scognet    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
620129203Scognet    return z;
621129203Scognet
622129203Scognet}
623129203Scognet
624129203Scognet/*
625129203Scognet-------------------------------------------------------------------------------
626129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
627129203Scognetand extended significand formed by the concatenation of `zSig0' and `zSig1',
628129203Scognetand returns the proper extended double-precision floating-point value
629129203Scognetcorresponding to the abstract input.  Ordinarily, the abstract value is
630129203Scognetrounded and packed into the extended double-precision format, with the
631129203Scognetinexact exception raised if the abstract input cannot be represented
632129203Scognetexactly.  However, if the abstract value is too large, the overflow and
633129203Scognetinexact exceptions are raised and an infinity or maximal finite value is
634129203Scognetreturned.  If the abstract value is too small, the input value is rounded to
635129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if
636129203Scognetthe abstract input cannot be represented exactly as a subnormal extended
637129203Scognetdouble-precision floating-point number.
638129203Scognet    If `roundingPrecision' is 32 or 64, the result is rounded to the same
639129203Scognetnumber of bits as single or double precision, respectively.  Otherwise, the
640129203Scognetresult is rounded to the full precision of the extended double-precision
641129203Scognetformat.
642129203Scognet    The input significand must be normalized or smaller.  If the input
643129203Scognetsignificand is not normalized, `zExp' must be 0; in that case, the result
644129203Scognetreturned is a subnormal number, and it must not require rounding.  The
645129203Scognethandling of underflow and overflow follows the IEC/IEEE Standard for Binary
646129203ScognetFloating-Point Arithmetic.
647129203Scognet-------------------------------------------------------------------------------
648129203Scognet*/
649129203Scognetstatic floatx80
650129203Scognet roundAndPackFloatx80(
651129203Scognet     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
652129203Scognet )
653129203Scognet{
654129203Scognet    int8 roundingMode;
655129203Scognet    flag roundNearestEven, increment, isTiny;
656129203Scognet    int64 roundIncrement, roundMask, roundBits;
657129203Scognet
658129203Scognet    roundingMode = float_rounding_mode;
659129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
660129203Scognet    if ( roundingPrecision == 80 ) goto precision80;
661129203Scognet    if ( roundingPrecision == 64 ) {
662129203Scognet        roundIncrement = LIT64( 0x0000000000000400 );
663129203Scognet        roundMask = LIT64( 0x00000000000007FF );
664129203Scognet    }
665129203Scognet    else if ( roundingPrecision == 32 ) {
666129203Scognet        roundIncrement = LIT64( 0x0000008000000000 );
667129203Scognet        roundMask = LIT64( 0x000000FFFFFFFFFF );
668129203Scognet    }
669129203Scognet    else {
670129203Scognet        goto precision80;
671129203Scognet    }
672129203Scognet    zSig0 |= ( zSig1 != 0 );
673129203Scognet    if ( ! roundNearestEven ) {
674129203Scognet        if ( roundingMode == float_round_to_zero ) {
675129203Scognet            roundIncrement = 0;
676129203Scognet        }
677129203Scognet        else {
678129203Scognet            roundIncrement = roundMask;
679129203Scognet            if ( zSign ) {
680129203Scognet                if ( roundingMode == float_round_up ) roundIncrement = 0;
681129203Scognet            }
682129203Scognet            else {
683129203Scognet                if ( roundingMode == float_round_down ) roundIncrement = 0;
684129203Scognet            }
685129203Scognet        }
686129203Scognet    }
687129203Scognet    roundBits = zSig0 & roundMask;
688129203Scognet    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
689129203Scognet        if (    ( 0x7FFE < zExp )
690129203Scognet             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
691129203Scognet           ) {
692129203Scognet            goto overflow;
693129203Scognet        }
694129203Scognet        if ( zExp <= 0 ) {
695129203Scognet            isTiny =
696129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
697129203Scognet                || ( zExp < 0 )
698129203Scognet                || ( zSig0 <= zSig0 + roundIncrement );
699129203Scognet            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
700129203Scognet            zExp = 0;
701129203Scognet            roundBits = zSig0 & roundMask;
702129203Scognet            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
703129203Scognet            if ( roundBits ) float_exception_flags |= float_flag_inexact;
704129203Scognet            zSig0 += roundIncrement;
705129203Scognet            if ( (sbits64) zSig0 < 0 ) zExp = 1;
706129203Scognet            roundIncrement = roundMask + 1;
707129203Scognet            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
708129203Scognet                roundMask |= roundIncrement;
709129203Scognet            }
710129203Scognet            zSig0 &= ~ roundMask;
711129203Scognet            return packFloatx80( zSign, zExp, zSig0 );
712129203Scognet        }
713129203Scognet    }
714129203Scognet    if ( roundBits ) float_exception_flags |= float_flag_inexact;
715129203Scognet    zSig0 += roundIncrement;
716129203Scognet    if ( zSig0 < roundIncrement ) {
717129203Scognet        ++zExp;
718129203Scognet        zSig0 = LIT64( 0x8000000000000000 );
719129203Scognet    }
720129203Scognet    roundIncrement = roundMask + 1;
721129203Scognet    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
722129203Scognet        roundMask |= roundIncrement;
723129203Scognet    }
724129203Scognet    zSig0 &= ~ roundMask;
725129203Scognet    if ( zSig0 == 0 ) zExp = 0;
726129203Scognet    return packFloatx80( zSign, zExp, zSig0 );
727129203Scognet precision80:
728129203Scognet    increment = ( (sbits64) zSig1 < 0 );
729129203Scognet    if ( ! roundNearestEven ) {
730129203Scognet        if ( roundingMode == float_round_to_zero ) {
731129203Scognet            increment = 0;
732129203Scognet        }
733129203Scognet        else {
734129203Scognet            if ( zSign ) {
735129203Scognet                increment = ( roundingMode == float_round_down ) && zSig1;
736129203Scognet            }
737129203Scognet            else {
738129203Scognet                increment = ( roundingMode == float_round_up ) && zSig1;
739129203Scognet            }
740129203Scognet        }
741129203Scognet    }
742129203Scognet    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
743129203Scognet        if (    ( 0x7FFE < zExp )
744129203Scognet             || (    ( zExp == 0x7FFE )
745129203Scognet                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
746129203Scognet                  && increment
747129203Scognet                )
748129203Scognet           ) {
749129203Scognet            roundMask = 0;
750129203Scognet overflow:
751129203Scognet            float_raise( float_flag_overflow | float_flag_inexact );
752129203Scognet            if (    ( roundingMode == float_round_to_zero )
753129203Scognet                 || ( zSign && ( roundingMode == float_round_up ) )
754129203Scognet                 || ( ! zSign && ( roundingMode == float_round_down ) )
755129203Scognet               ) {
756129203Scognet                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
757129203Scognet            }
758129203Scognet            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
759129203Scognet        }
760129203Scognet        if ( zExp <= 0 ) {
761129203Scognet            isTiny =
762129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
763129203Scognet                || ( zExp < 0 )
764129203Scognet                || ! increment
765129203Scognet                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
766129203Scognet            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
767129203Scognet            zExp = 0;
768129203Scognet            if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
769129203Scognet            if ( zSig1 ) float_exception_flags |= float_flag_inexact;
770129203Scognet            if ( roundNearestEven ) {
771129203Scognet                increment = ( (sbits64) zSig1 < 0 );
772129203Scognet            }
773129203Scognet            else {
774129203Scognet                if ( zSign ) {
775129203Scognet                    increment = ( roundingMode == float_round_down ) && zSig1;
776129203Scognet                }
777129203Scognet                else {
778129203Scognet                    increment = ( roundingMode == float_round_up ) && zSig1;
779129203Scognet                }
780129203Scognet            }
781129203Scognet            if ( increment ) {
782129203Scognet                ++zSig0;
783129203Scognet                zSig0 &=
784129203Scognet                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
785129203Scognet                if ( (sbits64) zSig0 < 0 ) zExp = 1;
786129203Scognet            }
787129203Scognet            return packFloatx80( zSign, zExp, zSig0 );
788129203Scognet        }
789129203Scognet    }
790129203Scognet    if ( zSig1 ) float_exception_flags |= float_flag_inexact;
791129203Scognet    if ( increment ) {
792129203Scognet        ++zSig0;
793129203Scognet        if ( zSig0 == 0 ) {
794129203Scognet            ++zExp;
795129203Scognet            zSig0 = LIT64( 0x8000000000000000 );
796129203Scognet        }
797129203Scognet        else {
798129203Scognet            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
799129203Scognet        }
800129203Scognet    }
801129203Scognet    else {
802129203Scognet        if ( zSig0 == 0 ) zExp = 0;
803129203Scognet    }
804129203Scognet    return packFloatx80( zSign, zExp, zSig0 );
805129203Scognet
806129203Scognet}
807129203Scognet
808129203Scognet/*
809129203Scognet-------------------------------------------------------------------------------
810129203ScognetTakes an abstract floating-point value having sign `zSign', exponent
811129203Scognet`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
812129203Scognetand returns the proper extended double-precision floating-point value
813129203Scognetcorresponding to the abstract input.  This routine is just like
814129203Scognet`roundAndPackFloatx80' except that the input significand does not have to be
815129203Scognetnormalized.
816129203Scognet-------------------------------------------------------------------------------
817129203Scognet*/
818129203Scognetstatic floatx80
819129203Scognet normalizeRoundAndPackFloatx80(
820129203Scognet     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
821129203Scognet )
822129203Scognet{
823129203Scognet    int8 shiftCount;
824129203Scognet
825129203Scognet    if ( zSig0 == 0 ) {
826129203Scognet        zSig0 = zSig1;
827129203Scognet        zSig1 = 0;
828129203Scognet        zExp -= 64;
829129203Scognet    }
830129203Scognet    shiftCount = countLeadingZeros64( zSig0 );
831129203Scognet    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
832129203Scognet    zExp -= shiftCount;
833129203Scognet    return
834129203Scognet        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
835129203Scognet
836129203Scognet}
837129203Scognet
838129203Scognet#endif
839129203Scognet
840129203Scognet#ifdef FLOAT128
841129203Scognet
842129203Scognet/*
843129203Scognet-------------------------------------------------------------------------------
844129203ScognetReturns the least-significant 64 fraction bits of the quadruple-precision
845129203Scognetfloating-point value `a'.
846129203Scognet-------------------------------------------------------------------------------
847129203Scognet*/
848129203ScognetINLINE bits64 extractFloat128Frac1( float128 a )
849129203Scognet{
850129203Scognet
851129203Scognet    return a.low;
852129203Scognet
853129203Scognet}
854129203Scognet
855129203Scognet/*
856129203Scognet-------------------------------------------------------------------------------
857129203ScognetReturns the most-significant 48 fraction bits of the quadruple-precision
858129203Scognetfloating-point value `a'.
859129203Scognet-------------------------------------------------------------------------------
860129203Scognet*/
861129203ScognetINLINE bits64 extractFloat128Frac0( float128 a )
862129203Scognet{
863129203Scognet
864129203Scognet    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
865129203Scognet
866129203Scognet}
867129203Scognet
868129203Scognet/*
869129203Scognet-------------------------------------------------------------------------------
870129203ScognetReturns the exponent bits of the quadruple-precision floating-point value
871129203Scognet`a'.
872129203Scognet-------------------------------------------------------------------------------
873129203Scognet*/
874129203ScognetINLINE int32 extractFloat128Exp( float128 a )
875129203Scognet{
876129203Scognet
877129203Scognet    return ( a.high>>48 ) & 0x7FFF;
878129203Scognet
879129203Scognet}
880129203Scognet
881129203Scognet/*
882129203Scognet-------------------------------------------------------------------------------
883129203ScognetReturns the sign bit of the quadruple-precision floating-point value `a'.
884129203Scognet-------------------------------------------------------------------------------
885129203Scognet*/
886129203ScognetINLINE flag extractFloat128Sign( float128 a )
887129203Scognet{
888129203Scognet
889129203Scognet    return a.high>>63;
890129203Scognet
891129203Scognet}
892129203Scognet
893129203Scognet/*
894129203Scognet-------------------------------------------------------------------------------
895129203ScognetNormalizes the subnormal quadruple-precision floating-point value
896129203Scognetrepresented by the denormalized significand formed by the concatenation of
897129203Scognet`aSig0' and `aSig1'.  The normalized exponent is stored at the location
898129203Scognetpointed to by `zExpPtr'.  The most significant 49 bits of the normalized
899129203Scognetsignificand are stored at the location pointed to by `zSig0Ptr', and the
900129203Scognetleast significant 64 bits of the normalized significand are stored at the
901129203Scognetlocation pointed to by `zSig1Ptr'.
902129203Scognet-------------------------------------------------------------------------------
903129203Scognet*/
904129203Scognetstatic void
905129203Scognet normalizeFloat128Subnormal(
906129203Scognet     bits64 aSig0,
907129203Scognet     bits64 aSig1,
908129203Scognet     int32 *zExpPtr,
909129203Scognet     bits64 *zSig0Ptr,
910129203Scognet     bits64 *zSig1Ptr
911129203Scognet )
912129203Scognet{
913129203Scognet    int8 shiftCount;
914129203Scognet
915129203Scognet    if ( aSig0 == 0 ) {
916129203Scognet        shiftCount = countLeadingZeros64( aSig1 ) - 15;
917129203Scognet        if ( shiftCount < 0 ) {
918129203Scognet            *zSig0Ptr = aSig1>>( - shiftCount );
919129203Scognet            *zSig1Ptr = aSig1<<( shiftCount & 63 );
920129203Scognet        }
921129203Scognet        else {
922129203Scognet            *zSig0Ptr = aSig1<<shiftCount;
923129203Scognet            *zSig1Ptr = 0;
924129203Scognet        }
925129203Scognet        *zExpPtr = - shiftCount - 63;
926129203Scognet    }
927129203Scognet    else {
928129203Scognet        shiftCount = countLeadingZeros64( aSig0 ) - 15;
929129203Scognet        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
930129203Scognet        *zExpPtr = 1 - shiftCount;
931129203Scognet    }
932129203Scognet
933129203Scognet}
934129203Scognet
935129203Scognet/*
936129203Scognet-------------------------------------------------------------------------------
937129203ScognetPacks the sign `zSign', the exponent `zExp', and the significand formed
938129203Scognetby the concatenation of `zSig0' and `zSig1' into a quadruple-precision
939129203Scognetfloating-point value, returning the result.  After being shifted into the
940129203Scognetproper positions, the three fields `zSign', `zExp', and `zSig0' are simply
941129203Scognetadded together to form the most significant 32 bits of the result.  This
942129203Scognetmeans that any integer portion of `zSig0' will be added into the exponent.
943129203ScognetSince a properly normalized significand will have an integer portion equal
944129203Scognetto 1, the `zExp' input should be 1 less than the desired result exponent
945129203Scognetwhenever `zSig0' and `zSig1' concatenated form a complete, normalized
946129203Scognetsignificand.
947129203Scognet-------------------------------------------------------------------------------
948129203Scognet*/
949129203ScognetINLINE float128
950129203Scognet packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
951129203Scognet{
952129203Scognet    float128 z;
953129203Scognet
954129203Scognet    z.low = zSig1;
955129203Scognet    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
956129203Scognet    return z;
957129203Scognet
958129203Scognet}
959129203Scognet
960129203Scognet/*
961129203Scognet-------------------------------------------------------------------------------
962129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
963129203Scognetand extended significand formed by the concatenation of `zSig0', `zSig1',
964129203Scognetand `zSig2', and returns the proper quadruple-precision floating-point value
965129203Scognetcorresponding to the abstract input.  Ordinarily, the abstract value is
966129203Scognetsimply rounded and packed into the quadruple-precision format, with the
967129203Scognetinexact exception raised if the abstract input cannot be represented
968129203Scognetexactly.  However, if the abstract value is too large, the overflow and
969129203Scognetinexact exceptions are raised and an infinity or maximal finite value is
970129203Scognetreturned.  If the abstract value is too small, the input value is rounded to
971129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if
972129203Scognetthe abstract input cannot be represented exactly as a subnormal quadruple-
973129203Scognetprecision floating-point number.
974129203Scognet    The input significand must be normalized or smaller.  If the input
975129203Scognetsignificand is not normalized, `zExp' must be 0; in that case, the result
976129203Scognetreturned is a subnormal number, and it must not require rounding.  In the
977129203Scognetusual case that the input significand is normalized, `zExp' must be 1 less
978129203Scognetthan the ``true'' floating-point exponent.  The handling of underflow and
979129203Scognetoverflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
980129203Scognet-------------------------------------------------------------------------------
981129203Scognet*/
982129203Scognetstatic float128
983129203Scognet roundAndPackFloat128(
984129203Scognet     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
985129203Scognet{
986129203Scognet    int8 roundingMode;
987129203Scognet    flag roundNearestEven, increment, isTiny;
988129203Scognet
989129203Scognet    roundingMode = float_rounding_mode;
990129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
991129203Scognet    increment = ( (sbits64) zSig2 < 0 );
992129203Scognet    if ( ! roundNearestEven ) {
993129203Scognet        if ( roundingMode == float_round_to_zero ) {
994129203Scognet            increment = 0;
995129203Scognet        }
996129203Scognet        else {
997129203Scognet            if ( zSign ) {
998129203Scognet                increment = ( roundingMode == float_round_down ) && zSig2;
999129203Scognet            }
1000129203Scognet            else {
1001129203Scognet                increment = ( roundingMode == float_round_up ) && zSig2;
1002129203Scognet            }
1003129203Scognet        }
1004129203Scognet    }
1005129203Scognet    if ( 0x7FFD <= (bits32) zExp ) {
1006129203Scognet        if (    ( 0x7FFD < zExp )
1007129203Scognet             || (    ( zExp == 0x7FFD )
1008129203Scognet                  && eq128(
1009129203Scognet                         LIT64( 0x0001FFFFFFFFFFFF ),
1010129203Scognet                         LIT64( 0xFFFFFFFFFFFFFFFF ),
1011129203Scognet                         zSig0,
1012129203Scognet                         zSig1
1013129203Scognet                     )
1014129203Scognet                  && increment
1015129203Scognet                )
1016129203Scognet           ) {
1017129203Scognet            float_raise( float_flag_overflow | float_flag_inexact );
1018129203Scognet            if (    ( roundingMode == float_round_to_zero )
1019129203Scognet                 || ( zSign && ( roundingMode == float_round_up ) )
1020129203Scognet                 || ( ! zSign && ( roundingMode == float_round_down ) )
1021129203Scognet               ) {
1022129203Scognet                return
1023129203Scognet                    packFloat128(
1024129203Scognet                        zSign,
1025129203Scognet                        0x7FFE,
1026129203Scognet                        LIT64( 0x0000FFFFFFFFFFFF ),
1027129203Scognet                        LIT64( 0xFFFFFFFFFFFFFFFF )
1028129203Scognet                    );
1029129203Scognet            }
1030129203Scognet            return packFloat128( zSign, 0x7FFF, 0, 0 );
1031129203Scognet        }
1032129203Scognet        if ( zExp < 0 ) {
1033129203Scognet            isTiny =
1034129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
1035129203Scognet                || ( zExp < -1 )
1036129203Scognet                || ! increment
1037129203Scognet                || lt128(
1038129203Scognet                       zSig0,
1039129203Scognet                       zSig1,
1040129203Scognet                       LIT64( 0x0001FFFFFFFFFFFF ),
1041129203Scognet                       LIT64( 0xFFFFFFFFFFFFFFFF )
1042129203Scognet                   );
1043129203Scognet            shift128ExtraRightJamming(
1044129203Scognet                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1045129203Scognet            zExp = 0;
1046129203Scognet            if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
1047129203Scognet            if ( roundNearestEven ) {
1048129203Scognet                increment = ( (sbits64) zSig2 < 0 );
1049129203Scognet            }
1050129203Scognet            else {
1051129203Scognet                if ( zSign ) {
1052129203Scognet                    increment = ( roundingMode == float_round_down ) && zSig2;
1053129203Scognet                }
1054129203Scognet                else {
1055129203Scognet                    increment = ( roundingMode == float_round_up ) && zSig2;
1056129203Scognet                }
1057129203Scognet            }
1058129203Scognet        }
1059129203Scognet    }
1060129203Scognet    if ( zSig2 ) float_exception_flags |= float_flag_inexact;
1061129203Scognet    if ( increment ) {
1062129203Scognet        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1063129203Scognet        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1064129203Scognet    }
1065129203Scognet    else {
1066129203Scognet        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1067129203Scognet    }
1068129203Scognet    return packFloat128( zSign, zExp, zSig0, zSig1 );
1069129203Scognet
1070129203Scognet}
1071129203Scognet
1072129203Scognet/*
1073129203Scognet-------------------------------------------------------------------------------
1074129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
1075129203Scognetand significand formed by the concatenation of `zSig0' and `zSig1', and
1076129203Scognetreturns the proper quadruple-precision floating-point value corresponding
1077129203Scognetto the abstract input.  This routine is just like `roundAndPackFloat128'
1078129203Scognetexcept that the input significand has fewer bits and does not have to be
1079129203Scognetnormalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1080129203Scognetpoint exponent.
1081129203Scognet-------------------------------------------------------------------------------
1082129203Scognet*/
1083129203Scognetstatic float128
1084129203Scognet normalizeRoundAndPackFloat128(
1085129203Scognet     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
1086129203Scognet{
1087129203Scognet    int8 shiftCount;
1088129203Scognet    bits64 zSig2;
1089129203Scognet
1090129203Scognet    if ( zSig0 == 0 ) {
1091129203Scognet        zSig0 = zSig1;
1092129203Scognet        zSig1 = 0;
1093129203Scognet        zExp -= 64;
1094129203Scognet    }
1095129203Scognet    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1096129203Scognet    if ( 0 <= shiftCount ) {
1097129203Scognet        zSig2 = 0;
1098129203Scognet        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1099129203Scognet    }
1100129203Scognet    else {
1101129203Scognet        shift128ExtraRightJamming(
1102129203Scognet            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1103129203Scognet    }
1104129203Scognet    zExp -= shiftCount;
1105129203Scognet    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
1106129203Scognet
1107129203Scognet}
1108129203Scognet
1109129203Scognet#endif
1110129203Scognet
1111129203Scognet/*
1112129203Scognet-------------------------------------------------------------------------------
1113129203ScognetReturns the result of converting the 32-bit two's complement integer `a'
1114129203Scognetto the single-precision floating-point format.  The conversion is performed
1115129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1116129203Scognet-------------------------------------------------------------------------------
1117129203Scognet*/
1118129203Scognetfloat32 int32_to_float32( int32 a )
1119129203Scognet{
1120129203Scognet    flag zSign;
1121129203Scognet
1122129203Scognet    if ( a == 0 ) return 0;
1123129203Scognet    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1124129203Scognet    zSign = ( a < 0 );
1125129203Scognet    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
1126129203Scognet
1127129203Scognet}
1128129203Scognet
1129230380Sdas#ifndef SOFTFLOAT_FOR_GCC /* __floatunsisf is in libgcc */
1130230363Sdasfloat32 uint32_to_float32( uint32 a )
1131230363Sdas{
1132230363Sdas    if ( a == 0 ) return 0;
1133230363Sdas    if ( a & (bits32) 0x80000000 )
1134230363Sdas	return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 );
1135230363Sdas    return normalizeRoundAndPackFloat32( 0, 0x9C, a );
1136230363Sdas}
1137230380Sdas#endif
1138230363Sdas
1139230363Sdas
1140129203Scognet/*
1141129203Scognet-------------------------------------------------------------------------------
1142129203ScognetReturns the result of converting the 32-bit two's complement integer `a'
1143129203Scognetto the double-precision floating-point format.  The conversion is performed
1144129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1145129203Scognet-------------------------------------------------------------------------------
1146129203Scognet*/
1147129203Scognetfloat64 int32_to_float64( int32 a )
1148129203Scognet{
1149129203Scognet    flag zSign;
1150129203Scognet    uint32 absA;
1151129203Scognet    int8 shiftCount;
1152129203Scognet    bits64 zSig;
1153129203Scognet
1154129203Scognet    if ( a == 0 ) return 0;
1155129203Scognet    zSign = ( a < 0 );
1156129203Scognet    absA = zSign ? - a : a;
1157129203Scognet    shiftCount = countLeadingZeros32( absA ) + 21;
1158129203Scognet    zSig = absA;
1159129203Scognet    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1160129203Scognet
1161129203Scognet}
1162129203Scognet
1163230380Sdas#ifndef SOFTFLOAT_FOR_GCC /* __floatunsidf is in libgcc */
1164230363Sdasfloat64 uint32_to_float64( uint32 a )
1165230363Sdas{
1166230363Sdas    int8 shiftCount;
1167230363Sdas    bits64 zSig = a;
1168230363Sdas
1169230363Sdas    if ( a == 0 ) return 0;
1170230363Sdas    shiftCount = countLeadingZeros32( a ) + 21;
1171230363Sdas    return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount );
1172230363Sdas
1173230363Sdas}
1174230380Sdas#endif
1175230363Sdas
1176129203Scognet#ifdef FLOATX80
1177129203Scognet
1178129203Scognet/*
1179129203Scognet-------------------------------------------------------------------------------
1180129203ScognetReturns the result of converting the 32-bit two's complement integer `a'
1181129203Scognetto the extended double-precision floating-point format.  The conversion
1182129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
1183129203ScognetArithmetic.
1184129203Scognet-------------------------------------------------------------------------------
1185129203Scognet*/
1186129203Scognetfloatx80 int32_to_floatx80( int32 a )
1187129203Scognet{
1188129203Scognet    flag zSign;
1189129203Scognet    uint32 absA;
1190129203Scognet    int8 shiftCount;
1191129203Scognet    bits64 zSig;
1192129203Scognet
1193129203Scognet    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1194129203Scognet    zSign = ( a < 0 );
1195129203Scognet    absA = zSign ? - a : a;
1196129203Scognet    shiftCount = countLeadingZeros32( absA ) + 32;
1197129203Scognet    zSig = absA;
1198129203Scognet    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1199129203Scognet
1200129203Scognet}
1201129203Scognet
1202230363Sdasfloatx80 uint32_to_floatx80( uint32 a )
1203230363Sdas{
1204230363Sdas    int8 shiftCount;
1205230363Sdas    bits64 zSig = a;
1206230363Sdas
1207230363Sdas    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1208230363Sdas    shiftCount = countLeadingZeros32( a ) + 32;
1209230363Sdas    return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount );
1210230363Sdas
1211230363Sdas}
1212230363Sdas
1213129203Scognet#endif
1214129203Scognet
1215129203Scognet#ifdef FLOAT128
1216129203Scognet
1217129203Scognet/*
1218129203Scognet-------------------------------------------------------------------------------
1219129203ScognetReturns the result of converting the 32-bit two's complement integer `a' to
1220129203Scognetthe quadruple-precision floating-point format.  The conversion is performed
1221129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1222129203Scognet-------------------------------------------------------------------------------
1223129203Scognet*/
1224129203Scognetfloat128 int32_to_float128( int32 a )
1225129203Scognet{
1226129203Scognet    flag zSign;
1227129203Scognet    uint32 absA;
1228129203Scognet    int8 shiftCount;
1229129203Scognet    bits64 zSig0;
1230129203Scognet
1231129203Scognet    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1232129203Scognet    zSign = ( a < 0 );
1233129203Scognet    absA = zSign ? - a : a;
1234129203Scognet    shiftCount = countLeadingZeros32( absA ) + 17;
1235129203Scognet    zSig0 = absA;
1236129203Scognet    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1237129203Scognet
1238129203Scognet}
1239129203Scognet
1240230363Sdasfloat128 uint32_to_float128( uint32 a )
1241230363Sdas{
1242230363Sdas    int8 shiftCount;
1243230363Sdas    bits64 zSig0 = a;
1244230363Sdas
1245230363Sdas    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1246230363Sdas    shiftCount = countLeadingZeros32( a ) + 17;
1247230363Sdas    return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1248230363Sdas
1249230363Sdas}
1250230363Sdas
1251129203Scognet#endif
1252129203Scognet
1253129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
1254129203Scognet/*
1255129203Scognet-------------------------------------------------------------------------------
1256129203ScognetReturns the result of converting the 64-bit two's complement integer `a'
1257129203Scognetto the single-precision floating-point format.  The conversion is performed
1258129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1259129203Scognet-------------------------------------------------------------------------------
1260129203Scognet*/
1261129203Scognetfloat32 int64_to_float32( int64 a )
1262129203Scognet{
1263129203Scognet    flag zSign;
1264129203Scognet    uint64 absA;
1265129203Scognet    int8 shiftCount;
1266129203Scognet
1267129203Scognet    if ( a == 0 ) return 0;
1268129203Scognet    zSign = ( a < 0 );
1269129203Scognet    absA = zSign ? - a : a;
1270129203Scognet    shiftCount = countLeadingZeros64( absA ) - 40;
1271129203Scognet    if ( 0 <= shiftCount ) {
1272129203Scognet        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1273129203Scognet    }
1274129203Scognet    else {
1275129203Scognet        shiftCount += 7;
1276129203Scognet        if ( shiftCount < 0 ) {
1277129203Scognet            shift64RightJamming( absA, - shiftCount, &absA );
1278129203Scognet        }
1279129203Scognet        else {
1280129203Scognet            absA <<= shiftCount;
1281129203Scognet        }
1282129203Scognet        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
1283129203Scognet    }
1284129203Scognet
1285129203Scognet}
1286129203Scognet
1287129203Scognet/*
1288129203Scognet-------------------------------------------------------------------------------
1289129203ScognetReturns the result of converting the 64-bit two's complement integer `a'
1290129203Scognetto the double-precision floating-point format.  The conversion is performed
1291129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1292129203Scognet-------------------------------------------------------------------------------
1293129203Scognet*/
1294129203Scognetfloat64 int64_to_float64( int64 a )
1295129203Scognet{
1296129203Scognet    flag zSign;
1297129203Scognet
1298129203Scognet    if ( a == 0 ) return 0;
1299129203Scognet    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1300129203Scognet        return packFloat64( 1, 0x43E, 0 );
1301129203Scognet    }
1302129203Scognet    zSign = ( a < 0 );
1303129203Scognet    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
1304129203Scognet
1305129203Scognet}
1306129203Scognet
1307129203Scognet#ifdef FLOATX80
1308129203Scognet
1309129203Scognet/*
1310129203Scognet-------------------------------------------------------------------------------
1311129203ScognetReturns the result of converting the 64-bit two's complement integer `a'
1312129203Scognetto the extended double-precision floating-point format.  The conversion
1313129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
1314129203ScognetArithmetic.
1315129203Scognet-------------------------------------------------------------------------------
1316129203Scognet*/
1317129203Scognetfloatx80 int64_to_floatx80( int64 a )
1318129203Scognet{
1319129203Scognet    flag zSign;
1320129203Scognet    uint64 absA;
1321129203Scognet    int8 shiftCount;
1322129203Scognet
1323129203Scognet    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1324129203Scognet    zSign = ( a < 0 );
1325129203Scognet    absA = zSign ? - a : a;
1326129203Scognet    shiftCount = countLeadingZeros64( absA );
1327129203Scognet    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1328129203Scognet
1329129203Scognet}
1330129203Scognet
1331129203Scognet#endif
1332129203Scognet
1333129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
1334129203Scognet
1335129203Scognet#ifdef FLOAT128
1336129203Scognet
1337129203Scognet/*
1338129203Scognet-------------------------------------------------------------------------------
1339129203ScognetReturns the result of converting the 64-bit two's complement integer `a' to
1340129203Scognetthe quadruple-precision floating-point format.  The conversion is performed
1341129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1342129203Scognet-------------------------------------------------------------------------------
1343129203Scognet*/
1344129203Scognetfloat128 int64_to_float128( int64 a )
1345129203Scognet{
1346129203Scognet    flag zSign;
1347129203Scognet    uint64 absA;
1348129203Scognet    int8 shiftCount;
1349129203Scognet    int32 zExp;
1350129203Scognet    bits64 zSig0, zSig1;
1351129203Scognet
1352129203Scognet    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1353129203Scognet    zSign = ( a < 0 );
1354129203Scognet    absA = zSign ? - a : a;
1355129203Scognet    shiftCount = countLeadingZeros64( absA ) + 49;
1356129203Scognet    zExp = 0x406E - shiftCount;
1357129203Scognet    if ( 64 <= shiftCount ) {
1358129203Scognet        zSig1 = 0;
1359129203Scognet        zSig0 = absA;
1360129203Scognet        shiftCount -= 64;
1361129203Scognet    }
1362129203Scognet    else {
1363129203Scognet        zSig1 = absA;
1364129203Scognet        zSig0 = 0;
1365129203Scognet    }
1366129203Scognet    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1367129203Scognet    return packFloat128( zSign, zExp, zSig0, zSig1 );
1368129203Scognet
1369129203Scognet}
1370129203Scognet
1371129203Scognet#endif
1372129203Scognet
1373129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1374129203Scognet/*
1375129203Scognet-------------------------------------------------------------------------------
1376129203ScognetReturns the result of converting the single-precision floating-point value
1377129203Scognet`a' to the 32-bit two's complement integer format.  The conversion is
1378129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
1379129203ScognetArithmetic---which means in particular that the conversion is rounded
1380129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
1381129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
1382129203Scognetlargest integer with the same sign as `a' is returned.
1383129203Scognet-------------------------------------------------------------------------------
1384129203Scognet*/
1385129203Scognetint32 float32_to_int32( float32 a )
1386129203Scognet{
1387129203Scognet    flag aSign;
1388129203Scognet    int16 aExp, shiftCount;
1389129203Scognet    bits32 aSig;
1390129203Scognet    bits64 aSig64;
1391129203Scognet
1392129203Scognet    aSig = extractFloat32Frac( a );
1393129203Scognet    aExp = extractFloat32Exp( a );
1394129203Scognet    aSign = extractFloat32Sign( a );
1395129203Scognet    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1396129203Scognet    if ( aExp ) aSig |= 0x00800000;
1397129203Scognet    shiftCount = 0xAF - aExp;
1398129203Scognet    aSig64 = aSig;
1399129203Scognet    aSig64 <<= 32;
1400129203Scognet    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1401129203Scognet    return roundAndPackInt32( aSign, aSig64 );
1402129203Scognet
1403129203Scognet}
1404129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
1405129203Scognet
1406129203Scognet/*
1407129203Scognet-------------------------------------------------------------------------------
1408129203ScognetReturns the result of converting the single-precision floating-point value
1409129203Scognet`a' to the 32-bit two's complement integer format.  The conversion is
1410129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
1411129203ScognetArithmetic, except that the conversion is always rounded toward zero.
1412129203ScognetIf `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1413129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is
1414129203Scognetreturned.
1415129203Scognet-------------------------------------------------------------------------------
1416129203Scognet*/
1417129203Scognetint32 float32_to_int32_round_to_zero( float32 a )
1418129203Scognet{
1419129203Scognet    flag aSign;
1420129203Scognet    int16 aExp, shiftCount;
1421129203Scognet    bits32 aSig;
1422129203Scognet    int32 z;
1423129203Scognet
1424129203Scognet    aSig = extractFloat32Frac( a );
1425129203Scognet    aExp = extractFloat32Exp( a );
1426129203Scognet    aSign = extractFloat32Sign( a );
1427129203Scognet    shiftCount = aExp - 0x9E;
1428129203Scognet    if ( 0 <= shiftCount ) {
1429129203Scognet        if ( a != 0xCF000000 ) {
1430129203Scognet            float_raise( float_flag_invalid );
1431129203Scognet            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1432129203Scognet        }
1433129203Scognet        return (sbits32) 0x80000000;
1434129203Scognet    }
1435129203Scognet    else if ( aExp <= 0x7E ) {
1436129203Scognet        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
1437129203Scognet        return 0;
1438129203Scognet    }
1439129203Scognet    aSig = ( aSig | 0x00800000 )<<8;
1440129203Scognet    z = aSig>>( - shiftCount );
1441129203Scognet    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1442129203Scognet        float_exception_flags |= float_flag_inexact;
1443129203Scognet    }
1444129203Scognet    if ( aSign ) z = - z;
1445129203Scognet    return z;
1446129203Scognet
1447129203Scognet}
1448129203Scognet
1449129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
1450129203Scognet/*
1451129203Scognet-------------------------------------------------------------------------------
1452129203ScognetReturns the result of converting the single-precision floating-point value
1453129203Scognet`a' to the 64-bit two's complement integer format.  The conversion is
1454129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
1455129203ScognetArithmetic---which means in particular that the conversion is rounded
1456129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
1457129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
1458129203Scognetlargest integer with the same sign as `a' is returned.
1459129203Scognet-------------------------------------------------------------------------------
1460129203Scognet*/
1461129203Scognetint64 float32_to_int64( float32 a )
1462129203Scognet{
1463129203Scognet    flag aSign;
1464129203Scognet    int16 aExp, shiftCount;
1465129203Scognet    bits32 aSig;
1466129203Scognet    bits64 aSig64, aSigExtra;
1467129203Scognet
1468129203Scognet    aSig = extractFloat32Frac( a );
1469129203Scognet    aExp = extractFloat32Exp( a );
1470129203Scognet    aSign = extractFloat32Sign( a );
1471129203Scognet    shiftCount = 0xBE - aExp;
1472129203Scognet    if ( shiftCount < 0 ) {
1473129203Scognet        float_raise( float_flag_invalid );
1474129203Scognet        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1475129203Scognet            return LIT64( 0x7FFFFFFFFFFFFFFF );
1476129203Scognet        }
1477129203Scognet        return (sbits64) LIT64( 0x8000000000000000 );
1478129203Scognet    }
1479129203Scognet    if ( aExp ) aSig |= 0x00800000;
1480129203Scognet    aSig64 = aSig;
1481129203Scognet    aSig64 <<= 40;
1482129203Scognet    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1483129203Scognet    return roundAndPackInt64( aSign, aSig64, aSigExtra );
1484129203Scognet
1485129203Scognet}
1486129203Scognet
1487129203Scognet/*
1488129203Scognet-------------------------------------------------------------------------------
1489129203ScognetReturns the result of converting the single-precision floating-point value
1490129203Scognet`a' to the 64-bit two's complement integer format.  The conversion is
1491129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
1492129203ScognetArithmetic, except that the conversion is always rounded toward zero.  If
1493129203Scognet`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1494129203Scognetconversion overflows, the largest integer with the same sign as `a' is
1495129203Scognetreturned.
1496129203Scognet-------------------------------------------------------------------------------
1497129203Scognet*/
1498129203Scognetint64 float32_to_int64_round_to_zero( float32 a )
1499129203Scognet{
1500129203Scognet    flag aSign;
1501129203Scognet    int16 aExp, shiftCount;
1502129203Scognet    bits32 aSig;
1503129203Scognet    bits64 aSig64;
1504129203Scognet    int64 z;
1505129203Scognet
1506129203Scognet    aSig = extractFloat32Frac( a );
1507129203Scognet    aExp = extractFloat32Exp( a );
1508129203Scognet    aSign = extractFloat32Sign( a );
1509129203Scognet    shiftCount = aExp - 0xBE;
1510129203Scognet    if ( 0 <= shiftCount ) {
1511129203Scognet        if ( a != 0xDF000000 ) {
1512129203Scognet            float_raise( float_flag_invalid );
1513129203Scognet            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1514129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
1515129203Scognet            }
1516129203Scognet        }
1517129203Scognet        return (sbits64) LIT64( 0x8000000000000000 );
1518129203Scognet    }
1519129203Scognet    else if ( aExp <= 0x7E ) {
1520129203Scognet        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
1521129203Scognet        return 0;
1522129203Scognet    }
1523129203Scognet    aSig64 = aSig | 0x00800000;
1524129203Scognet    aSig64 <<= 40;
1525129203Scognet    z = aSig64>>( - shiftCount );
1526129203Scognet    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1527129203Scognet        float_exception_flags |= float_flag_inexact;
1528129203Scognet    }
1529129203Scognet    if ( aSign ) z = - z;
1530129203Scognet    return z;
1531129203Scognet
1532129203Scognet}
1533129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
1534129203Scognet
1535129203Scognet/*
1536129203Scognet-------------------------------------------------------------------------------
1537129203ScognetReturns the result of converting the single-precision floating-point value
1538129203Scognet`a' to the double-precision floating-point format.  The conversion is
1539129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
1540129203ScognetArithmetic.
1541129203Scognet-------------------------------------------------------------------------------
1542129203Scognet*/
1543129203Scognetfloat64 float32_to_float64( float32 a )
1544129203Scognet{
1545129203Scognet    flag aSign;
1546129203Scognet    int16 aExp;
1547129203Scognet    bits32 aSig;
1548129203Scognet
1549129203Scognet    aSig = extractFloat32Frac( a );
1550129203Scognet    aExp = extractFloat32Exp( a );
1551129203Scognet    aSign = extractFloat32Sign( a );
1552129203Scognet    if ( aExp == 0xFF ) {
1553129203Scognet        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
1554129203Scognet        return packFloat64( aSign, 0x7FF, 0 );
1555129203Scognet    }
1556129203Scognet    if ( aExp == 0 ) {
1557129203Scognet        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1558129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1559129203Scognet        --aExp;
1560129203Scognet    }
1561129203Scognet    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1562129203Scognet
1563129203Scognet}
1564129203Scognet
1565129203Scognet#ifdef FLOATX80
1566129203Scognet
1567129203Scognet/*
1568129203Scognet-------------------------------------------------------------------------------
1569129203ScognetReturns the result of converting the single-precision floating-point value
1570129203Scognet`a' to the extended double-precision floating-point format.  The conversion
1571129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
1572129203ScognetArithmetic.
1573129203Scognet-------------------------------------------------------------------------------
1574129203Scognet*/
1575129203Scognetfloatx80 float32_to_floatx80( float32 a )
1576129203Scognet{
1577129203Scognet    flag aSign;
1578129203Scognet    int16 aExp;
1579129203Scognet    bits32 aSig;
1580129203Scognet
1581129203Scognet    aSig = extractFloat32Frac( a );
1582129203Scognet    aExp = extractFloat32Exp( a );
1583129203Scognet    aSign = extractFloat32Sign( a );
1584129203Scognet    if ( aExp == 0xFF ) {
1585129203Scognet        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
1586129203Scognet        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1587129203Scognet    }
1588129203Scognet    if ( aExp == 0 ) {
1589129203Scognet        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1590129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1591129203Scognet    }
1592129203Scognet    aSig |= 0x00800000;
1593129203Scognet    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1594129203Scognet
1595129203Scognet}
1596129203Scognet
1597129203Scognet#endif
1598129203Scognet
1599129203Scognet#ifdef FLOAT128
1600129203Scognet
1601129203Scognet/*
1602129203Scognet-------------------------------------------------------------------------------
1603129203ScognetReturns the result of converting the single-precision floating-point value
1604129203Scognet`a' to the double-precision floating-point format.  The conversion is
1605129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
1606129203ScognetArithmetic.
1607129203Scognet-------------------------------------------------------------------------------
1608129203Scognet*/
1609129203Scognetfloat128 float32_to_float128( float32 a )
1610129203Scognet{
1611129203Scognet    flag aSign;
1612129203Scognet    int16 aExp;
1613129203Scognet    bits32 aSig;
1614129203Scognet
1615129203Scognet    aSig = extractFloat32Frac( a );
1616129203Scognet    aExp = extractFloat32Exp( a );
1617129203Scognet    aSign = extractFloat32Sign( a );
1618129203Scognet    if ( aExp == 0xFF ) {
1619129203Scognet        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
1620129203Scognet        return packFloat128( aSign, 0x7FFF, 0, 0 );
1621129203Scognet    }
1622129203Scognet    if ( aExp == 0 ) {
1623129203Scognet        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1624129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1625129203Scognet        --aExp;
1626129203Scognet    }
1627129203Scognet    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1628129203Scognet
1629129203Scognet}
1630129203Scognet
1631129203Scognet#endif
1632129203Scognet
1633129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1634129203Scognet/*
1635129203Scognet-------------------------------------------------------------------------------
1636129203ScognetRounds the single-precision floating-point value `a' to an integer, and
1637129203Scognetreturns the result as a single-precision floating-point value.  The
1638129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary
1639129203ScognetFloating-Point Arithmetic.
1640129203Scognet-------------------------------------------------------------------------------
1641129203Scognet*/
1642129203Scognetfloat32 float32_round_to_int( float32 a )
1643129203Scognet{
1644129203Scognet    flag aSign;
1645129203Scognet    int16 aExp;
1646129203Scognet    bits32 lastBitMask, roundBitsMask;
1647129203Scognet    int8 roundingMode;
1648129203Scognet    float32 z;
1649129203Scognet
1650129203Scognet    aExp = extractFloat32Exp( a );
1651129203Scognet    if ( 0x96 <= aExp ) {
1652129203Scognet        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1653129203Scognet            return propagateFloat32NaN( a, a );
1654129203Scognet        }
1655129203Scognet        return a;
1656129203Scognet    }
1657129203Scognet    if ( aExp <= 0x7E ) {
1658129203Scognet        if ( (bits32) ( a<<1 ) == 0 ) return a;
1659129203Scognet        float_exception_flags |= float_flag_inexact;
1660129203Scognet        aSign = extractFloat32Sign( a );
1661129203Scognet        switch ( float_rounding_mode ) {
1662129203Scognet         case float_round_nearest_even:
1663129203Scognet            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1664129203Scognet                return packFloat32( aSign, 0x7F, 0 );
1665129203Scognet            }
1666129203Scognet            break;
1667129203Scognet	 case float_round_to_zero:
1668129203Scognet	    break;
1669129203Scognet         case float_round_down:
1670129203Scognet            return aSign ? 0xBF800000 : 0;
1671129203Scognet         case float_round_up:
1672129203Scognet            return aSign ? 0x80000000 : 0x3F800000;
1673129203Scognet        }
1674129203Scognet        return packFloat32( aSign, 0, 0 );
1675129203Scognet    }
1676129203Scognet    lastBitMask = 1;
1677129203Scognet    lastBitMask <<= 0x96 - aExp;
1678129203Scognet    roundBitsMask = lastBitMask - 1;
1679129203Scognet    z = a;
1680129203Scognet    roundingMode = float_rounding_mode;
1681129203Scognet    if ( roundingMode == float_round_nearest_even ) {
1682129203Scognet        z += lastBitMask>>1;
1683129203Scognet        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1684129203Scognet    }
1685129203Scognet    else if ( roundingMode != float_round_to_zero ) {
1686129203Scognet        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1687129203Scognet            z += roundBitsMask;
1688129203Scognet        }
1689129203Scognet    }
1690129203Scognet    z &= ~ roundBitsMask;
1691129203Scognet    if ( z != a ) float_exception_flags |= float_flag_inexact;
1692129203Scognet    return z;
1693129203Scognet
1694129203Scognet}
1695129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
1696129203Scognet
1697129203Scognet/*
1698129203Scognet-------------------------------------------------------------------------------
1699129203ScognetReturns the result of adding the absolute values of the single-precision
1700129203Scognetfloating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1701129203Scognetbefore being returned.  `zSign' is ignored if the result is a NaN.
1702129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary
1703129203ScognetFloating-Point Arithmetic.
1704129203Scognet-------------------------------------------------------------------------------
1705129203Scognet*/
1706129203Scognetstatic float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
1707129203Scognet{
1708129203Scognet    int16 aExp, bExp, zExp;
1709129203Scognet    bits32 aSig, bSig, zSig;
1710129203Scognet    int16 expDiff;
1711129203Scognet
1712129203Scognet    aSig = extractFloat32Frac( a );
1713129203Scognet    aExp = extractFloat32Exp( a );
1714129203Scognet    bSig = extractFloat32Frac( b );
1715129203Scognet    bExp = extractFloat32Exp( b );
1716129203Scognet    expDiff = aExp - bExp;
1717129203Scognet    aSig <<= 6;
1718129203Scognet    bSig <<= 6;
1719129203Scognet    if ( 0 < expDiff ) {
1720129203Scognet        if ( aExp == 0xFF ) {
1721129203Scognet            if ( aSig ) return propagateFloat32NaN( a, b );
1722129203Scognet            return a;
1723129203Scognet        }
1724129203Scognet        if ( bExp == 0 ) {
1725129203Scognet            --expDiff;
1726129203Scognet        }
1727129203Scognet        else {
1728129203Scognet            bSig |= 0x20000000;
1729129203Scognet        }
1730129203Scognet        shift32RightJamming( bSig, expDiff, &bSig );
1731129203Scognet        zExp = aExp;
1732129203Scognet    }
1733129203Scognet    else if ( expDiff < 0 ) {
1734129203Scognet        if ( bExp == 0xFF ) {
1735129203Scognet            if ( bSig ) return propagateFloat32NaN( a, b );
1736129203Scognet            return packFloat32( zSign, 0xFF, 0 );
1737129203Scognet        }
1738129203Scognet        if ( aExp == 0 ) {
1739129203Scognet            ++expDiff;
1740129203Scognet        }
1741129203Scognet        else {
1742129203Scognet            aSig |= 0x20000000;
1743129203Scognet        }
1744129203Scognet        shift32RightJamming( aSig, - expDiff, &aSig );
1745129203Scognet        zExp = bExp;
1746129203Scognet    }
1747129203Scognet    else {
1748129203Scognet        if ( aExp == 0xFF ) {
1749129203Scognet            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1750129203Scognet            return a;
1751129203Scognet        }
1752129203Scognet        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1753129203Scognet        zSig = 0x40000000 + aSig + bSig;
1754129203Scognet        zExp = aExp;
1755129203Scognet        goto roundAndPack;
1756129203Scognet    }
1757129203Scognet    aSig |= 0x20000000;
1758129203Scognet    zSig = ( aSig + bSig )<<1;
1759129203Scognet    --zExp;
1760129203Scognet    if ( (sbits32) zSig < 0 ) {
1761129203Scognet        zSig = aSig + bSig;
1762129203Scognet        ++zExp;
1763129203Scognet    }
1764129203Scognet roundAndPack:
1765129203Scognet    return roundAndPackFloat32( zSign, zExp, zSig );
1766129203Scognet
1767129203Scognet}
1768129203Scognet
1769129203Scognet/*
1770129203Scognet-------------------------------------------------------------------------------
1771129203ScognetReturns the result of subtracting the absolute values of the single-
1772129203Scognetprecision floating-point values `a' and `b'.  If `zSign' is 1, the
1773129203Scognetdifference is negated before being returned.  `zSign' is ignored if the
1774129203Scognetresult is a NaN.  The subtraction is performed according to the IEC/IEEE
1775129203ScognetStandard for Binary Floating-Point Arithmetic.
1776129203Scognet-------------------------------------------------------------------------------
1777129203Scognet*/
1778129203Scognetstatic float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1779129203Scognet{
1780129203Scognet    int16 aExp, bExp, zExp;
1781129203Scognet    bits32 aSig, bSig, zSig;
1782129203Scognet    int16 expDiff;
1783129203Scognet
1784129203Scognet    aSig = extractFloat32Frac( a );
1785129203Scognet    aExp = extractFloat32Exp( a );
1786129203Scognet    bSig = extractFloat32Frac( b );
1787129203Scognet    bExp = extractFloat32Exp( b );
1788129203Scognet    expDiff = aExp - bExp;
1789129203Scognet    aSig <<= 7;
1790129203Scognet    bSig <<= 7;
1791129203Scognet    if ( 0 < expDiff ) goto aExpBigger;
1792129203Scognet    if ( expDiff < 0 ) goto bExpBigger;
1793129203Scognet    if ( aExp == 0xFF ) {
1794129203Scognet        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1795129203Scognet        float_raise( float_flag_invalid );
1796129203Scognet        return float32_default_nan;
1797129203Scognet    }
1798129203Scognet    if ( aExp == 0 ) {
1799129203Scognet        aExp = 1;
1800129203Scognet        bExp = 1;
1801129203Scognet    }
1802129203Scognet    if ( bSig < aSig ) goto aBigger;
1803129203Scognet    if ( aSig < bSig ) goto bBigger;
1804129203Scognet    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
1805129203Scognet bExpBigger:
1806129203Scognet    if ( bExp == 0xFF ) {
1807129203Scognet        if ( bSig ) return propagateFloat32NaN( a, b );
1808129203Scognet        return packFloat32( zSign ^ 1, 0xFF, 0 );
1809129203Scognet    }
1810129203Scognet    if ( aExp == 0 ) {
1811129203Scognet        ++expDiff;
1812129203Scognet    }
1813129203Scognet    else {
1814129203Scognet        aSig |= 0x40000000;
1815129203Scognet    }
1816129203Scognet    shift32RightJamming( aSig, - expDiff, &aSig );
1817129203Scognet    bSig |= 0x40000000;
1818129203Scognet bBigger:
1819129203Scognet    zSig = bSig - aSig;
1820129203Scognet    zExp = bExp;
1821129203Scognet    zSign ^= 1;
1822129203Scognet    goto normalizeRoundAndPack;
1823129203Scognet aExpBigger:
1824129203Scognet    if ( aExp == 0xFF ) {
1825129203Scognet        if ( aSig ) return propagateFloat32NaN( a, b );
1826129203Scognet        return a;
1827129203Scognet    }
1828129203Scognet    if ( bExp == 0 ) {
1829129203Scognet        --expDiff;
1830129203Scognet    }
1831129203Scognet    else {
1832129203Scognet        bSig |= 0x40000000;
1833129203Scognet    }
1834129203Scognet    shift32RightJamming( bSig, expDiff, &bSig );
1835129203Scognet    aSig |= 0x40000000;
1836129203Scognet aBigger:
1837129203Scognet    zSig = aSig - bSig;
1838129203Scognet    zExp = aExp;
1839129203Scognet normalizeRoundAndPack:
1840129203Scognet    --zExp;
1841129203Scognet    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
1842129203Scognet
1843129203Scognet}
1844129203Scognet
1845129203Scognet/*
1846129203Scognet-------------------------------------------------------------------------------
1847129203ScognetReturns the result of adding the single-precision floating-point values `a'
1848129203Scognetand `b'.  The operation is performed according to the IEC/IEEE Standard for
1849129203ScognetBinary Floating-Point Arithmetic.
1850129203Scognet-------------------------------------------------------------------------------
1851129203Scognet*/
1852129203Scognetfloat32 float32_add( float32 a, float32 b )
1853129203Scognet{
1854129203Scognet    flag aSign, bSign;
1855129203Scognet
1856129203Scognet    aSign = extractFloat32Sign( a );
1857129203Scognet    bSign = extractFloat32Sign( b );
1858129203Scognet    if ( aSign == bSign ) {
1859129203Scognet        return addFloat32Sigs( a, b, aSign );
1860129203Scognet    }
1861129203Scognet    else {
1862129203Scognet        return subFloat32Sigs( a, b, aSign );
1863129203Scognet    }
1864129203Scognet
1865129203Scognet}
1866129203Scognet
1867129203Scognet/*
1868129203Scognet-------------------------------------------------------------------------------
1869129203ScognetReturns the result of subtracting the single-precision floating-point values
1870129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1871129203Scognetfor Binary Floating-Point Arithmetic.
1872129203Scognet-------------------------------------------------------------------------------
1873129203Scognet*/
1874129203Scognetfloat32 float32_sub( float32 a, float32 b )
1875129203Scognet{
1876129203Scognet    flag aSign, bSign;
1877129203Scognet
1878129203Scognet    aSign = extractFloat32Sign( a );
1879129203Scognet    bSign = extractFloat32Sign( b );
1880129203Scognet    if ( aSign == bSign ) {
1881129203Scognet        return subFloat32Sigs( a, b, aSign );
1882129203Scognet    }
1883129203Scognet    else {
1884129203Scognet        return addFloat32Sigs( a, b, aSign );
1885129203Scognet    }
1886129203Scognet
1887129203Scognet}
1888129203Scognet
1889129203Scognet/*
1890129203Scognet-------------------------------------------------------------------------------
1891129203ScognetReturns the result of multiplying the single-precision floating-point values
1892129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1893129203Scognetfor Binary Floating-Point Arithmetic.
1894129203Scognet-------------------------------------------------------------------------------
1895129203Scognet*/
1896129203Scognetfloat32 float32_mul( float32 a, float32 b )
1897129203Scognet{
1898129203Scognet    flag aSign, bSign, zSign;
1899129203Scognet    int16 aExp, bExp, zExp;
1900129203Scognet    bits32 aSig, bSig;
1901129203Scognet    bits64 zSig64;
1902129203Scognet    bits32 zSig;
1903129203Scognet
1904129203Scognet    aSig = extractFloat32Frac( a );
1905129203Scognet    aExp = extractFloat32Exp( a );
1906129203Scognet    aSign = extractFloat32Sign( a );
1907129203Scognet    bSig = extractFloat32Frac( b );
1908129203Scognet    bExp = extractFloat32Exp( b );
1909129203Scognet    bSign = extractFloat32Sign( b );
1910129203Scognet    zSign = aSign ^ bSign;
1911129203Scognet    if ( aExp == 0xFF ) {
1912129203Scognet        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1913129203Scognet            return propagateFloat32NaN( a, b );
1914129203Scognet        }
1915129203Scognet        if ( ( bExp | bSig ) == 0 ) {
1916129203Scognet            float_raise( float_flag_invalid );
1917129203Scognet            return float32_default_nan;
1918129203Scognet        }
1919129203Scognet        return packFloat32( zSign, 0xFF, 0 );
1920129203Scognet    }
1921129203Scognet    if ( bExp == 0xFF ) {
1922129203Scognet        if ( bSig ) return propagateFloat32NaN( a, b );
1923129203Scognet        if ( ( aExp | aSig ) == 0 ) {
1924129203Scognet            float_raise( float_flag_invalid );
1925129203Scognet            return float32_default_nan;
1926129203Scognet        }
1927129203Scognet        return packFloat32( zSign, 0xFF, 0 );
1928129203Scognet    }
1929129203Scognet    if ( aExp == 0 ) {
1930129203Scognet        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1931129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1932129203Scognet    }
1933129203Scognet    if ( bExp == 0 ) {
1934129203Scognet        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1935129203Scognet        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1936129203Scognet    }
1937129203Scognet    zExp = aExp + bExp - 0x7F;
1938129203Scognet    aSig = ( aSig | 0x00800000 )<<7;
1939129203Scognet    bSig = ( bSig | 0x00800000 )<<8;
1940129203Scognet    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1941129203Scognet    zSig = zSig64;
1942129203Scognet    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1943129203Scognet        zSig <<= 1;
1944129203Scognet        --zExp;
1945129203Scognet    }
1946129203Scognet    return roundAndPackFloat32( zSign, zExp, zSig );
1947129203Scognet
1948129203Scognet}
1949129203Scognet
1950129203Scognet/*
1951129203Scognet-------------------------------------------------------------------------------
1952129203ScognetReturns the result of dividing the single-precision floating-point value `a'
1953129203Scognetby the corresponding value `b'.  The operation is performed according to the
1954129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
1955129203Scognet-------------------------------------------------------------------------------
1956129203Scognet*/
1957129203Scognetfloat32 float32_div( float32 a, float32 b )
1958129203Scognet{
1959129203Scognet    flag aSign, bSign, zSign;
1960129203Scognet    int16 aExp, bExp, zExp;
1961129203Scognet    bits32 aSig, bSig, zSig;
1962129203Scognet
1963129203Scognet    aSig = extractFloat32Frac( a );
1964129203Scognet    aExp = extractFloat32Exp( a );
1965129203Scognet    aSign = extractFloat32Sign( a );
1966129203Scognet    bSig = extractFloat32Frac( b );
1967129203Scognet    bExp = extractFloat32Exp( b );
1968129203Scognet    bSign = extractFloat32Sign( b );
1969129203Scognet    zSign = aSign ^ bSign;
1970129203Scognet    if ( aExp == 0xFF ) {
1971129203Scognet        if ( aSig ) return propagateFloat32NaN( a, b );
1972129203Scognet        if ( bExp == 0xFF ) {
1973129203Scognet            if ( bSig ) return propagateFloat32NaN( a, b );
1974129203Scognet            float_raise( float_flag_invalid );
1975129203Scognet            return float32_default_nan;
1976129203Scognet        }
1977129203Scognet        return packFloat32( zSign, 0xFF, 0 );
1978129203Scognet    }
1979129203Scognet    if ( bExp == 0xFF ) {
1980129203Scognet        if ( bSig ) return propagateFloat32NaN( a, b );
1981129203Scognet        return packFloat32( zSign, 0, 0 );
1982129203Scognet    }
1983129203Scognet    if ( bExp == 0 ) {
1984129203Scognet        if ( bSig == 0 ) {
1985129203Scognet            if ( ( aExp | aSig ) == 0 ) {
1986129203Scognet                float_raise( float_flag_invalid );
1987129203Scognet                return float32_default_nan;
1988129203Scognet            }
1989129203Scognet            float_raise( float_flag_divbyzero );
1990129203Scognet            return packFloat32( zSign, 0xFF, 0 );
1991129203Scognet        }
1992129203Scognet        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1993129203Scognet    }
1994129203Scognet    if ( aExp == 0 ) {
1995129203Scognet        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1996129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1997129203Scognet    }
1998129203Scognet    zExp = aExp - bExp + 0x7D;
1999129203Scognet    aSig = ( aSig | 0x00800000 )<<7;
2000129203Scognet    bSig = ( bSig | 0x00800000 )<<8;
2001129203Scognet    if ( bSig <= ( aSig + aSig ) ) {
2002129203Scognet        aSig >>= 1;
2003129203Scognet        ++zExp;
2004129203Scognet    }
2005129203Scognet    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
2006129203Scognet    if ( ( zSig & 0x3F ) == 0 ) {
2007129203Scognet        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
2008129203Scognet    }
2009129203Scognet    return roundAndPackFloat32( zSign, zExp, zSig );
2010129203Scognet
2011129203Scognet}
2012129203Scognet
2013129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2014129203Scognet/*
2015129203Scognet-------------------------------------------------------------------------------
2016129203ScognetReturns the remainder of the single-precision floating-point value `a'
2017129203Scognetwith respect to the corresponding value `b'.  The operation is performed
2018129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2019129203Scognet-------------------------------------------------------------------------------
2020129203Scognet*/
2021129203Scognetfloat32 float32_rem( float32 a, float32 b )
2022129203Scognet{
2023129203Scognet    flag aSign, bSign, zSign;
2024129203Scognet    int16 aExp, bExp, expDiff;
2025129203Scognet    bits32 aSig, bSig;
2026129203Scognet    bits32 q;
2027129203Scognet    bits64 aSig64, bSig64, q64;
2028129203Scognet    bits32 alternateASig;
2029129203Scognet    sbits32 sigMean;
2030129203Scognet
2031129203Scognet    aSig = extractFloat32Frac( a );
2032129203Scognet    aExp = extractFloat32Exp( a );
2033129203Scognet    aSign = extractFloat32Sign( a );
2034129203Scognet    bSig = extractFloat32Frac( b );
2035129203Scognet    bExp = extractFloat32Exp( b );
2036129203Scognet    bSign = extractFloat32Sign( b );
2037129203Scognet    if ( aExp == 0xFF ) {
2038129203Scognet        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2039129203Scognet            return propagateFloat32NaN( a, b );
2040129203Scognet        }
2041129203Scognet        float_raise( float_flag_invalid );
2042129203Scognet        return float32_default_nan;
2043129203Scognet    }
2044129203Scognet    if ( bExp == 0xFF ) {
2045129203Scognet        if ( bSig ) return propagateFloat32NaN( a, b );
2046129203Scognet        return a;
2047129203Scognet    }
2048129203Scognet    if ( bExp == 0 ) {
2049129203Scognet        if ( bSig == 0 ) {
2050129203Scognet            float_raise( float_flag_invalid );
2051129203Scognet            return float32_default_nan;
2052129203Scognet        }
2053129203Scognet        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2054129203Scognet    }
2055129203Scognet    if ( aExp == 0 ) {
2056129203Scognet        if ( aSig == 0 ) return a;
2057129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2058129203Scognet    }
2059129203Scognet    expDiff = aExp - bExp;
2060129203Scognet    aSig |= 0x00800000;
2061129203Scognet    bSig |= 0x00800000;
2062129203Scognet    if ( expDiff < 32 ) {
2063129203Scognet        aSig <<= 8;
2064129203Scognet        bSig <<= 8;
2065129203Scognet        if ( expDiff < 0 ) {
2066129203Scognet            if ( expDiff < -1 ) return a;
2067129203Scognet            aSig >>= 1;
2068129203Scognet        }
2069129203Scognet        q = ( bSig <= aSig );
2070129203Scognet        if ( q ) aSig -= bSig;
2071129203Scognet        if ( 0 < expDiff ) {
2072129203Scognet            q = ( ( (bits64) aSig )<<32 ) / bSig;
2073129203Scognet            q >>= 32 - expDiff;
2074129203Scognet            bSig >>= 2;
2075129203Scognet            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2076129203Scognet        }
2077129203Scognet        else {
2078129203Scognet            aSig >>= 2;
2079129203Scognet            bSig >>= 2;
2080129203Scognet        }
2081129203Scognet    }
2082129203Scognet    else {
2083129203Scognet        if ( bSig <= aSig ) aSig -= bSig;
2084129203Scognet        aSig64 = ( (bits64) aSig )<<40;
2085129203Scognet        bSig64 = ( (bits64) bSig )<<40;
2086129203Scognet        expDiff -= 64;
2087129203Scognet        while ( 0 < expDiff ) {
2088129203Scognet            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2089129203Scognet            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2090129203Scognet            aSig64 = - ( ( bSig * q64 )<<38 );
2091129203Scognet            expDiff -= 62;
2092129203Scognet        }
2093129203Scognet        expDiff += 64;
2094129203Scognet        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2095129203Scognet        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2096129203Scognet        q = q64>>( 64 - expDiff );
2097129203Scognet        bSig <<= 6;
2098129203Scognet        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2099129203Scognet    }
2100129203Scognet    do {
2101129203Scognet        alternateASig = aSig;
2102129203Scognet        ++q;
2103129203Scognet        aSig -= bSig;
2104129203Scognet    } while ( 0 <= (sbits32) aSig );
2105129203Scognet    sigMean = aSig + alternateASig;
2106129203Scognet    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2107129203Scognet        aSig = alternateASig;
2108129203Scognet    }
2109129203Scognet    zSign = ( (sbits32) aSig < 0 );
2110129203Scognet    if ( zSign ) aSig = - aSig;
2111129203Scognet    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
2112129203Scognet
2113129203Scognet}
2114129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
2115129203Scognet
2116129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2117129203Scognet/*
2118129203Scognet-------------------------------------------------------------------------------
2119129203ScognetReturns the square root of the single-precision floating-point value `a'.
2120129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary
2121129203ScognetFloating-Point Arithmetic.
2122129203Scognet-------------------------------------------------------------------------------
2123129203Scognet*/
2124129203Scognetfloat32 float32_sqrt( float32 a )
2125129203Scognet{
2126129203Scognet    flag aSign;
2127129203Scognet    int16 aExp, zExp;
2128129203Scognet    bits32 aSig, zSig;
2129129203Scognet    bits64 rem, term;
2130129203Scognet
2131129203Scognet    aSig = extractFloat32Frac( a );
2132129203Scognet    aExp = extractFloat32Exp( a );
2133129203Scognet    aSign = extractFloat32Sign( a );
2134129203Scognet    if ( aExp == 0xFF ) {
2135129203Scognet        if ( aSig ) return propagateFloat32NaN( a, 0 );
2136129203Scognet        if ( ! aSign ) return a;
2137129203Scognet        float_raise( float_flag_invalid );
2138129203Scognet        return float32_default_nan;
2139129203Scognet    }
2140129203Scognet    if ( aSign ) {
2141129203Scognet        if ( ( aExp | aSig ) == 0 ) return a;
2142129203Scognet        float_raise( float_flag_invalid );
2143129203Scognet        return float32_default_nan;
2144129203Scognet    }
2145129203Scognet    if ( aExp == 0 ) {
2146129203Scognet        if ( aSig == 0 ) return 0;
2147129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2148129203Scognet    }
2149129203Scognet    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2150129203Scognet    aSig = ( aSig | 0x00800000 )<<8;
2151129203Scognet    zSig = estimateSqrt32( aExp, aSig ) + 2;
2152129203Scognet    if ( ( zSig & 0x7F ) <= 5 ) {
2153129203Scognet        if ( zSig < 2 ) {
2154129203Scognet            zSig = 0x7FFFFFFF;
2155129203Scognet            goto roundAndPack;
2156129203Scognet        }
2157129203Scognet        aSig >>= aExp & 1;
2158129203Scognet        term = ( (bits64) zSig ) * zSig;
2159129203Scognet        rem = ( ( (bits64) aSig )<<32 ) - term;
2160129203Scognet        while ( (sbits64) rem < 0 ) {
2161129203Scognet            --zSig;
2162129203Scognet            rem += ( ( (bits64) zSig )<<1 ) | 1;
2163129203Scognet        }
2164129203Scognet        zSig |= ( rem != 0 );
2165129203Scognet    }
2166129203Scognet    shift32RightJamming( zSig, 1, &zSig );
2167129203Scognet roundAndPack:
2168129203Scognet    return roundAndPackFloat32( 0, zExp, zSig );
2169129203Scognet
2170129203Scognet}
2171129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
2172129203Scognet
2173129203Scognet/*
2174129203Scognet-------------------------------------------------------------------------------
2175129203ScognetReturns 1 if the single-precision floating-point value `a' is equal to
2176129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
2177129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2178129203Scognet-------------------------------------------------------------------------------
2179129203Scognet*/
2180129203Scognetflag float32_eq( float32 a, float32 b )
2181129203Scognet{
2182129203Scognet
2183129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2184129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2185129203Scognet       ) {
2186129203Scognet        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2187129203Scognet            float_raise( float_flag_invalid );
2188129203Scognet        }
2189129203Scognet        return 0;
2190129203Scognet    }
2191129203Scognet    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2192129203Scognet
2193129203Scognet}
2194129203Scognet
2195129203Scognet/*
2196129203Scognet-------------------------------------------------------------------------------
2197129203ScognetReturns 1 if the single-precision floating-point value `a' is less than
2198129203Scognetor equal to the corresponding value `b', and 0 otherwise.  The comparison
2199129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
2200129203ScognetArithmetic.
2201129203Scognet-------------------------------------------------------------------------------
2202129203Scognet*/
2203129203Scognetflag float32_le( float32 a, float32 b )
2204129203Scognet{
2205129203Scognet    flag aSign, bSign;
2206129203Scognet
2207129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2208129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2209129203Scognet       ) {
2210129203Scognet        float_raise( float_flag_invalid );
2211129203Scognet        return 0;
2212129203Scognet    }
2213129203Scognet    aSign = extractFloat32Sign( a );
2214129203Scognet    bSign = extractFloat32Sign( b );
2215129203Scognet    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2216129203Scognet    return ( a == b ) || ( aSign ^ ( a < b ) );
2217129203Scognet
2218129203Scognet}
2219129203Scognet
2220129203Scognet/*
2221129203Scognet-------------------------------------------------------------------------------
2222129203ScognetReturns 1 if the single-precision floating-point value `a' is less than
2223129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
2224129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2225129203Scognet-------------------------------------------------------------------------------
2226129203Scognet*/
2227129203Scognetflag float32_lt( float32 a, float32 b )
2228129203Scognet{
2229129203Scognet    flag aSign, bSign;
2230129203Scognet
2231129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2232129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2233129203Scognet       ) {
2234129203Scognet        float_raise( float_flag_invalid );
2235129203Scognet        return 0;
2236129203Scognet    }
2237129203Scognet    aSign = extractFloat32Sign( a );
2238129203Scognet    bSign = extractFloat32Sign( b );
2239129203Scognet    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2240129203Scognet    return ( a != b ) && ( aSign ^ ( a < b ) );
2241129203Scognet
2242129203Scognet}
2243129203Scognet
2244129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2245129203Scognet/*
2246129203Scognet-------------------------------------------------------------------------------
2247129203ScognetReturns 1 if the single-precision floating-point value `a' is equal to
2248129203Scognetthe corresponding value `b', and 0 otherwise.  The invalid exception is
2249129203Scognetraised if either operand is a NaN.  Otherwise, the comparison is performed
2250129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2251129203Scognet-------------------------------------------------------------------------------
2252129203Scognet*/
2253129203Scognetflag float32_eq_signaling( float32 a, float32 b )
2254129203Scognet{
2255129203Scognet
2256129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2257129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2258129203Scognet       ) {
2259129203Scognet        float_raise( float_flag_invalid );
2260129203Scognet        return 0;
2261129203Scognet    }
2262129203Scognet    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2263129203Scognet
2264129203Scognet}
2265129203Scognet
2266129203Scognet/*
2267129203Scognet-------------------------------------------------------------------------------
2268129203ScognetReturns 1 if the single-precision floating-point value `a' is less than or
2269129203Scognetequal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2270129203Scognetcause an exception.  Otherwise, the comparison is performed according to the
2271129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
2272129203Scognet-------------------------------------------------------------------------------
2273129203Scognet*/
2274129203Scognetflag float32_le_quiet( float32 a, float32 b )
2275129203Scognet{
2276129203Scognet    flag aSign, bSign;
2277129203Scognet
2278129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2279129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2280129203Scognet       ) {
2281129203Scognet        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2282129203Scognet            float_raise( float_flag_invalid );
2283129203Scognet        }
2284129203Scognet        return 0;
2285129203Scognet    }
2286129203Scognet    aSign = extractFloat32Sign( a );
2287129203Scognet    bSign = extractFloat32Sign( b );
2288129203Scognet    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2289129203Scognet    return ( a == b ) || ( aSign ^ ( a < b ) );
2290129203Scognet
2291129203Scognet}
2292129203Scognet
2293129203Scognet/*
2294129203Scognet-------------------------------------------------------------------------------
2295129203ScognetReturns 1 if the single-precision floating-point value `a' is less than
2296129203Scognetthe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2297129203Scognetexception.  Otherwise, the comparison is performed according to the IEC/IEEE
2298129203ScognetStandard for Binary Floating-Point Arithmetic.
2299129203Scognet-------------------------------------------------------------------------------
2300129203Scognet*/
2301129203Scognetflag float32_lt_quiet( float32 a, float32 b )
2302129203Scognet{
2303129203Scognet    flag aSign, bSign;
2304129203Scognet
2305129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2306129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2307129203Scognet       ) {
2308129203Scognet        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2309129203Scognet            float_raise( float_flag_invalid );
2310129203Scognet        }
2311129203Scognet        return 0;
2312129203Scognet    }
2313129203Scognet    aSign = extractFloat32Sign( a );
2314129203Scognet    bSign = extractFloat32Sign( b );
2315129203Scognet    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2316129203Scognet    return ( a != b ) && ( aSign ^ ( a < b ) );
2317129203Scognet
2318129203Scognet}
2319129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
2320129203Scognet
2321129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2322129203Scognet/*
2323129203Scognet-------------------------------------------------------------------------------
2324129203ScognetReturns the result of converting the double-precision floating-point value
2325129203Scognet`a' to the 32-bit two's complement integer format.  The conversion is
2326129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
2327129203ScognetArithmetic---which means in particular that the conversion is rounded
2328129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
2329129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
2330129203Scognetlargest integer with the same sign as `a' is returned.
2331129203Scognet-------------------------------------------------------------------------------
2332129203Scognet*/
2333129203Scognetint32 float64_to_int32( float64 a )
2334129203Scognet{
2335129203Scognet    flag aSign;
2336129203Scognet    int16 aExp, shiftCount;
2337129203Scognet    bits64 aSig;
2338129203Scognet
2339129203Scognet    aSig = extractFloat64Frac( a );
2340129203Scognet    aExp = extractFloat64Exp( a );
2341129203Scognet    aSign = extractFloat64Sign( a );
2342129203Scognet    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2343129203Scognet    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2344129203Scognet    shiftCount = 0x42C - aExp;
2345129203Scognet    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2346129203Scognet    return roundAndPackInt32( aSign, aSig );
2347129203Scognet
2348129203Scognet}
2349129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
2350129203Scognet
2351129203Scognet/*
2352129203Scognet-------------------------------------------------------------------------------
2353129203ScognetReturns the result of converting the double-precision floating-point value
2354129203Scognet`a' to the 32-bit two's complement integer format.  The conversion is
2355129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
2356129203ScognetArithmetic, except that the conversion is always rounded toward zero.
2357129203ScognetIf `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2358129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is
2359129203Scognetreturned.
2360129203Scognet-------------------------------------------------------------------------------
2361129203Scognet*/
2362129203Scognetint32 float64_to_int32_round_to_zero( float64 a )
2363129203Scognet{
2364129203Scognet    flag aSign;
2365129203Scognet    int16 aExp, shiftCount;
2366129203Scognet    bits64 aSig, savedASig;
2367129203Scognet    int32 z;
2368129203Scognet
2369129203Scognet    aSig = extractFloat64Frac( a );
2370129203Scognet    aExp = extractFloat64Exp( a );
2371129203Scognet    aSign = extractFloat64Sign( a );
2372129203Scognet    if ( 0x41E < aExp ) {
2373129203Scognet        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2374129203Scognet        goto invalid;
2375129203Scognet    }
2376129203Scognet    else if ( aExp < 0x3FF ) {
2377129203Scognet        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
2378129203Scognet        return 0;
2379129203Scognet    }
2380129203Scognet    aSig |= LIT64( 0x0010000000000000 );
2381129203Scognet    shiftCount = 0x433 - aExp;
2382129203Scognet    savedASig = aSig;
2383129203Scognet    aSig >>= shiftCount;
2384129203Scognet    z = aSig;
2385129203Scognet    if ( aSign ) z = - z;
2386129203Scognet    if ( ( z < 0 ) ^ aSign ) {
2387129203Scognet invalid:
2388129203Scognet        float_raise( float_flag_invalid );
2389129203Scognet        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2390129203Scognet    }
2391129203Scognet    if ( ( aSig<<shiftCount ) != savedASig ) {
2392129203Scognet        float_exception_flags |= float_flag_inexact;
2393129203Scognet    }
2394129203Scognet    return z;
2395129203Scognet
2396129203Scognet}
2397129203Scognet
2398129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2399129203Scognet/*
2400129203Scognet-------------------------------------------------------------------------------
2401129203ScognetReturns the result of converting the double-precision floating-point value
2402129203Scognet`a' to the 64-bit two's complement integer format.  The conversion is
2403129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
2404129203ScognetArithmetic---which means in particular that the conversion is rounded
2405129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
2406129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
2407129203Scognetlargest integer with the same sign as `a' is returned.
2408129203Scognet-------------------------------------------------------------------------------
2409129203Scognet*/
2410129203Scognetint64 float64_to_int64( float64 a )
2411129203Scognet{
2412129203Scognet    flag aSign;
2413129203Scognet    int16 aExp, shiftCount;
2414129203Scognet    bits64 aSig, aSigExtra;
2415129203Scognet
2416129203Scognet    aSig = extractFloat64Frac( a );
2417129203Scognet    aExp = extractFloat64Exp( a );
2418129203Scognet    aSign = extractFloat64Sign( a );
2419129203Scognet    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2420129203Scognet    shiftCount = 0x433 - aExp;
2421129203Scognet    if ( shiftCount <= 0 ) {
2422129203Scognet        if ( 0x43E < aExp ) {
2423129203Scognet            float_raise( float_flag_invalid );
2424129203Scognet            if (    ! aSign
2425129203Scognet                 || (    ( aExp == 0x7FF )
2426129203Scognet                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2427129203Scognet               ) {
2428129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
2429129203Scognet            }
2430129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
2431129203Scognet        }
2432129203Scognet        aSigExtra = 0;
2433129203Scognet        aSig <<= - shiftCount;
2434129203Scognet    }
2435129203Scognet    else {
2436129203Scognet        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2437129203Scognet    }
2438129203Scognet    return roundAndPackInt64( aSign, aSig, aSigExtra );
2439129203Scognet
2440129203Scognet}
2441129203Scognet
2442129203Scognet/*
2443129203Scognet-------------------------------------------------------------------------------
2444129203ScognetReturns the result of converting the double-precision floating-point value
2445129203Scognet`a' to the 64-bit two's complement integer format.  The conversion is
2446129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
2447129203ScognetArithmetic, except that the conversion is always rounded toward zero.
2448129203ScognetIf `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2449129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is
2450129203Scognetreturned.
2451129203Scognet-------------------------------------------------------------------------------
2452129203Scognet*/
2453129203Scognetint64 float64_to_int64_round_to_zero( float64 a )
2454129203Scognet{
2455129203Scognet    flag aSign;
2456129203Scognet    int16 aExp, shiftCount;
2457129203Scognet    bits64 aSig;
2458129203Scognet    int64 z;
2459129203Scognet
2460129203Scognet    aSig = extractFloat64Frac( a );
2461129203Scognet    aExp = extractFloat64Exp( a );
2462129203Scognet    aSign = extractFloat64Sign( a );
2463129203Scognet    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2464129203Scognet    shiftCount = aExp - 0x433;
2465129203Scognet    if ( 0 <= shiftCount ) {
2466129203Scognet        if ( 0x43E <= aExp ) {
2467129203Scognet            if ( a != LIT64( 0xC3E0000000000000 ) ) {
2468129203Scognet                float_raise( float_flag_invalid );
2469129203Scognet                if (    ! aSign
2470129203Scognet                     || (    ( aExp == 0x7FF )
2471129203Scognet                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2472129203Scognet                   ) {
2473129203Scognet                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2474129203Scognet                }
2475129203Scognet            }
2476129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
2477129203Scognet        }
2478129203Scognet        z = aSig<<shiftCount;
2479129203Scognet    }
2480129203Scognet    else {
2481129203Scognet        if ( aExp < 0x3FE ) {
2482129203Scognet            if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
2483129203Scognet            return 0;
2484129203Scognet        }
2485129203Scognet        z = aSig>>( - shiftCount );
2486129203Scognet        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2487129203Scognet            float_exception_flags |= float_flag_inexact;
2488129203Scognet        }
2489129203Scognet    }
2490129203Scognet    if ( aSign ) z = - z;
2491129203Scognet    return z;
2492129203Scognet
2493129203Scognet}
2494129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
2495129203Scognet
2496129203Scognet/*
2497129203Scognet-------------------------------------------------------------------------------
2498129203ScognetReturns the result of converting the double-precision floating-point value
2499129203Scognet`a' to the single-precision floating-point format.  The conversion is
2500129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
2501129203ScognetArithmetic.
2502129203Scognet-------------------------------------------------------------------------------
2503129203Scognet*/
2504129203Scognetfloat32 float64_to_float32( float64 a )
2505129203Scognet{
2506129203Scognet    flag aSign;
2507129203Scognet    int16 aExp;
2508129203Scognet    bits64 aSig;
2509129203Scognet    bits32 zSig;
2510129203Scognet
2511129203Scognet    aSig = extractFloat64Frac( a );
2512129203Scognet    aExp = extractFloat64Exp( a );
2513129203Scognet    aSign = extractFloat64Sign( a );
2514129203Scognet    if ( aExp == 0x7FF ) {
2515129203Scognet        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
2516129203Scognet        return packFloat32( aSign, 0xFF, 0 );
2517129203Scognet    }
2518129203Scognet    shift64RightJamming( aSig, 22, &aSig );
2519129203Scognet    zSig = aSig;
2520129203Scognet    if ( aExp || zSig ) {
2521129203Scognet        zSig |= 0x40000000;
2522129203Scognet        aExp -= 0x381;
2523129203Scognet    }
2524129203Scognet    return roundAndPackFloat32( aSign, aExp, zSig );
2525129203Scognet
2526129203Scognet}
2527129203Scognet
2528129203Scognet#ifdef FLOATX80
2529129203Scognet
2530129203Scognet/*
2531129203Scognet-------------------------------------------------------------------------------
2532129203ScognetReturns the result of converting the double-precision floating-point value
2533129203Scognet`a' to the extended double-precision floating-point format.  The conversion
2534129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
2535129203ScognetArithmetic.
2536129203Scognet-------------------------------------------------------------------------------
2537129203Scognet*/
2538129203Scognetfloatx80 float64_to_floatx80( float64 a )
2539129203Scognet{
2540129203Scognet    flag aSign;
2541129203Scognet    int16 aExp;
2542129203Scognet    bits64 aSig;
2543129203Scognet
2544129203Scognet    aSig = extractFloat64Frac( a );
2545129203Scognet    aExp = extractFloat64Exp( a );
2546129203Scognet    aSign = extractFloat64Sign( a );
2547129203Scognet    if ( aExp == 0x7FF ) {
2548129203Scognet        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
2549129203Scognet        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2550129203Scognet    }
2551129203Scognet    if ( aExp == 0 ) {
2552129203Scognet        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2553129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2554129203Scognet    }
2555129203Scognet    return
2556129203Scognet        packFloatx80(
2557129203Scognet            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2558129203Scognet
2559129203Scognet}
2560129203Scognet
2561129203Scognet#endif
2562129203Scognet
2563129203Scognet#ifdef FLOAT128
2564129203Scognet
2565129203Scognet/*
2566129203Scognet-------------------------------------------------------------------------------
2567129203ScognetReturns the result of converting the double-precision floating-point value
2568129203Scognet`a' to the quadruple-precision floating-point format.  The conversion is
2569129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
2570129203ScognetArithmetic.
2571129203Scognet-------------------------------------------------------------------------------
2572129203Scognet*/
2573129203Scognetfloat128 float64_to_float128( float64 a )
2574129203Scognet{
2575129203Scognet    flag aSign;
2576129203Scognet    int16 aExp;
2577129203Scognet    bits64 aSig, zSig0, zSig1;
2578129203Scognet
2579129203Scognet    aSig = extractFloat64Frac( a );
2580129203Scognet    aExp = extractFloat64Exp( a );
2581129203Scognet    aSign = extractFloat64Sign( a );
2582129203Scognet    if ( aExp == 0x7FF ) {
2583129203Scognet        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
2584129203Scognet        return packFloat128( aSign, 0x7FFF, 0, 0 );
2585129203Scognet    }
2586129203Scognet    if ( aExp == 0 ) {
2587129203Scognet        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2588129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2589129203Scognet        --aExp;
2590129203Scognet    }
2591129203Scognet    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2592129203Scognet    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2593129203Scognet
2594129203Scognet}
2595129203Scognet
2596129203Scognet#endif
2597129203Scognet
2598129203Scognet#ifndef SOFTFLOAT_FOR_GCC
2599129203Scognet/*
2600129203Scognet-------------------------------------------------------------------------------
2601129203ScognetRounds the double-precision floating-point value `a' to an integer, and
2602129203Scognetreturns the result as a double-precision floating-point value.  The
2603129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary
2604129203ScognetFloating-Point Arithmetic.
2605129203Scognet-------------------------------------------------------------------------------
2606129203Scognet*/
2607129203Scognetfloat64 float64_round_to_int( float64 a )
2608129203Scognet{
2609129203Scognet    flag aSign;
2610129203Scognet    int16 aExp;
2611129203Scognet    bits64 lastBitMask, roundBitsMask;
2612129203Scognet    int8 roundingMode;
2613129203Scognet    float64 z;
2614129203Scognet
2615129203Scognet    aExp = extractFloat64Exp( a );
2616129203Scognet    if ( 0x433 <= aExp ) {
2617129203Scognet        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2618129203Scognet            return propagateFloat64NaN( a, a );
2619129203Scognet        }
2620129203Scognet        return a;
2621129203Scognet    }
2622129203Scognet    if ( aExp < 0x3FF ) {
2623129203Scognet        if ( (bits64) ( a<<1 ) == 0 ) return a;
2624129203Scognet        float_exception_flags |= float_flag_inexact;
2625129203Scognet        aSign = extractFloat64Sign( a );
2626129203Scognet        switch ( float_rounding_mode ) {
2627129203Scognet         case float_round_nearest_even:
2628129203Scognet            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2629129203Scognet                return packFloat64( aSign, 0x3FF, 0 );
2630129203Scognet            }
2631129203Scognet            break;
2632129203Scognet	 case float_round_to_zero:
2633129203Scognet	    break;
2634129203Scognet         case float_round_down:
2635129203Scognet            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
2636129203Scognet         case float_round_up:
2637129203Scognet            return
2638129203Scognet            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
2639129203Scognet        }
2640129203Scognet        return packFloat64( aSign, 0, 0 );
2641129203Scognet    }
2642129203Scognet    lastBitMask = 1;
2643129203Scognet    lastBitMask <<= 0x433 - aExp;
2644129203Scognet    roundBitsMask = lastBitMask - 1;
2645129203Scognet    z = a;
2646129203Scognet    roundingMode = float_rounding_mode;
2647129203Scognet    if ( roundingMode == float_round_nearest_even ) {
2648129203Scognet        z += lastBitMask>>1;
2649129203Scognet        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2650129203Scognet    }
2651129203Scognet    else if ( roundingMode != float_round_to_zero ) {
2652129203Scognet        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2653129203Scognet            z += roundBitsMask;
2654129203Scognet        }
2655129203Scognet    }
2656129203Scognet    z &= ~ roundBitsMask;
2657129203Scognet    if ( z != a ) float_exception_flags |= float_flag_inexact;
2658129203Scognet    return z;
2659129203Scognet
2660129203Scognet}
2661129203Scognet#endif
2662129203Scognet
2663129203Scognet/*
2664129203Scognet-------------------------------------------------------------------------------
2665129203ScognetReturns the result of adding the absolute values of the double-precision
2666129203Scognetfloating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
2667129203Scognetbefore being returned.  `zSign' is ignored if the result is a NaN.
2668129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary
2669129203ScognetFloating-Point Arithmetic.
2670129203Scognet-------------------------------------------------------------------------------
2671129203Scognet*/
2672129203Scognetstatic float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
2673129203Scognet{
2674129203Scognet    int16 aExp, bExp, zExp;
2675129203Scognet    bits64 aSig, bSig, zSig;
2676129203Scognet    int16 expDiff;
2677129203Scognet
2678129203Scognet    aSig = extractFloat64Frac( a );
2679129203Scognet    aExp = extractFloat64Exp( a );
2680129203Scognet    bSig = extractFloat64Frac( b );
2681129203Scognet    bExp = extractFloat64Exp( b );
2682129203Scognet    expDiff = aExp - bExp;
2683129203Scognet    aSig <<= 9;
2684129203Scognet    bSig <<= 9;
2685129203Scognet    if ( 0 < expDiff ) {
2686129203Scognet        if ( aExp == 0x7FF ) {
2687129203Scognet            if ( aSig ) return propagateFloat64NaN( a, b );
2688129203Scognet            return a;
2689129203Scognet        }
2690129203Scognet        if ( bExp == 0 ) {
2691129203Scognet            --expDiff;
2692129203Scognet        }
2693129203Scognet        else {
2694129203Scognet            bSig |= LIT64( 0x2000000000000000 );
2695129203Scognet        }
2696129203Scognet        shift64RightJamming( bSig, expDiff, &bSig );
2697129203Scognet        zExp = aExp;
2698129203Scognet    }
2699129203Scognet    else if ( expDiff < 0 ) {
2700129203Scognet        if ( bExp == 0x7FF ) {
2701129203Scognet            if ( bSig ) return propagateFloat64NaN( a, b );
2702129203Scognet            return packFloat64( zSign, 0x7FF, 0 );
2703129203Scognet        }
2704129203Scognet        if ( aExp == 0 ) {
2705129203Scognet            ++expDiff;
2706129203Scognet        }
2707129203Scognet        else {
2708129203Scognet            aSig |= LIT64( 0x2000000000000000 );
2709129203Scognet        }
2710129203Scognet        shift64RightJamming( aSig, - expDiff, &aSig );
2711129203Scognet        zExp = bExp;
2712129203Scognet    }
2713129203Scognet    else {
2714129203Scognet        if ( aExp == 0x7FF ) {
2715129203Scognet            if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2716129203Scognet            return a;
2717129203Scognet        }
2718129203Scognet        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2719129203Scognet        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2720129203Scognet        zExp = aExp;
2721129203Scognet        goto roundAndPack;
2722129203Scognet    }
2723129203Scognet    aSig |= LIT64( 0x2000000000000000 );
2724129203Scognet    zSig = ( aSig + bSig )<<1;
2725129203Scognet    --zExp;
2726129203Scognet    if ( (sbits64) zSig < 0 ) {
2727129203Scognet        zSig = aSig + bSig;
2728129203Scognet        ++zExp;
2729129203Scognet    }
2730129203Scognet roundAndPack:
2731129203Scognet    return roundAndPackFloat64( zSign, zExp, zSig );
2732129203Scognet
2733129203Scognet}
2734129203Scognet
2735129203Scognet/*
2736129203Scognet-------------------------------------------------------------------------------
2737129203ScognetReturns the result of subtracting the absolute values of the double-
2738129203Scognetprecision floating-point values `a' and `b'.  If `zSign' is 1, the
2739129203Scognetdifference is negated before being returned.  `zSign' is ignored if the
2740129203Scognetresult is a NaN.  The subtraction is performed according to the IEC/IEEE
2741129203ScognetStandard for Binary Floating-Point Arithmetic.
2742129203Scognet-------------------------------------------------------------------------------
2743129203Scognet*/
2744129203Scognetstatic float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
2745129203Scognet{
2746129203Scognet    int16 aExp, bExp, zExp;
2747129203Scognet    bits64 aSig, bSig, zSig;
2748129203Scognet    int16 expDiff;
2749129203Scognet
2750129203Scognet    aSig = extractFloat64Frac( a );
2751129203Scognet    aExp = extractFloat64Exp( a );
2752129203Scognet    bSig = extractFloat64Frac( b );
2753129203Scognet    bExp = extractFloat64Exp( b );
2754129203Scognet    expDiff = aExp - bExp;
2755129203Scognet    aSig <<= 10;
2756129203Scognet    bSig <<= 10;
2757129203Scognet    if ( 0 < expDiff ) goto aExpBigger;
2758129203Scognet    if ( expDiff < 0 ) goto bExpBigger;
2759129203Scognet    if ( aExp == 0x7FF ) {
2760129203Scognet        if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2761129203Scognet        float_raise( float_flag_invalid );
2762129203Scognet        return float64_default_nan;
2763129203Scognet    }
2764129203Scognet    if ( aExp == 0 ) {
2765129203Scognet        aExp = 1;
2766129203Scognet        bExp = 1;
2767129203Scognet    }
2768129203Scognet    if ( bSig < aSig ) goto aBigger;
2769129203Scognet    if ( aSig < bSig ) goto bBigger;
2770129203Scognet    return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
2771129203Scognet bExpBigger:
2772129203Scognet    if ( bExp == 0x7FF ) {
2773129203Scognet        if ( bSig ) return propagateFloat64NaN( a, b );
2774129203Scognet        return packFloat64( zSign ^ 1, 0x7FF, 0 );
2775129203Scognet    }
2776129203Scognet    if ( aExp == 0 ) {
2777129203Scognet        ++expDiff;
2778129203Scognet    }
2779129203Scognet    else {
2780129203Scognet        aSig |= LIT64( 0x4000000000000000 );
2781129203Scognet    }
2782129203Scognet    shift64RightJamming( aSig, - expDiff, &aSig );
2783129203Scognet    bSig |= LIT64( 0x4000000000000000 );
2784129203Scognet bBigger:
2785129203Scognet    zSig = bSig - aSig;
2786129203Scognet    zExp = bExp;
2787129203Scognet    zSign ^= 1;
2788129203Scognet    goto normalizeRoundAndPack;
2789129203Scognet aExpBigger:
2790129203Scognet    if ( aExp == 0x7FF ) {
2791129203Scognet        if ( aSig ) return propagateFloat64NaN( a, b );
2792129203Scognet        return a;
2793129203Scognet    }
2794129203Scognet    if ( bExp == 0 ) {
2795129203Scognet        --expDiff;
2796129203Scognet    }
2797129203Scognet    else {
2798129203Scognet        bSig |= LIT64( 0x4000000000000000 );
2799129203Scognet    }
2800129203Scognet    shift64RightJamming( bSig, expDiff, &bSig );
2801129203Scognet    aSig |= LIT64( 0x4000000000000000 );
2802129203Scognet aBigger:
2803129203Scognet    zSig = aSig - bSig;
2804129203Scognet    zExp = aExp;
2805129203Scognet normalizeRoundAndPack:
2806129203Scognet    --zExp;
2807129203Scognet    return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
2808129203Scognet
2809129203Scognet}
2810129203Scognet
2811129203Scognet/*
2812129203Scognet-------------------------------------------------------------------------------
2813129203ScognetReturns the result of adding the double-precision floating-point values `a'
2814129203Scognetand `b'.  The operation is performed according to the IEC/IEEE Standard for
2815129203ScognetBinary Floating-Point Arithmetic.
2816129203Scognet-------------------------------------------------------------------------------
2817129203Scognet*/
2818129203Scognetfloat64 float64_add( float64 a, float64 b )
2819129203Scognet{
2820129203Scognet    flag aSign, bSign;
2821129203Scognet
2822129203Scognet    aSign = extractFloat64Sign( a );
2823129203Scognet    bSign = extractFloat64Sign( b );
2824129203Scognet    if ( aSign == bSign ) {
2825129203Scognet        return addFloat64Sigs( a, b, aSign );
2826129203Scognet    }
2827129203Scognet    else {
2828129203Scognet        return subFloat64Sigs( a, b, aSign );
2829129203Scognet    }
2830129203Scognet
2831129203Scognet}
2832129203Scognet
2833129203Scognet/*
2834129203Scognet-------------------------------------------------------------------------------
2835129203ScognetReturns the result of subtracting the double-precision floating-point values
2836129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2837129203Scognetfor Binary Floating-Point Arithmetic.
2838129203Scognet-------------------------------------------------------------------------------
2839129203Scognet*/
2840129203Scognetfloat64 float64_sub( float64 a, float64 b )
2841129203Scognet{
2842129203Scognet    flag aSign, bSign;
2843129203Scognet
2844129203Scognet    aSign = extractFloat64Sign( a );
2845129203Scognet    bSign = extractFloat64Sign( b );
2846129203Scognet    if ( aSign == bSign ) {
2847129203Scognet        return subFloat64Sigs( a, b, aSign );
2848129203Scognet    }
2849129203Scognet    else {
2850129203Scognet        return addFloat64Sigs( a, b, aSign );
2851129203Scognet    }
2852129203Scognet
2853129203Scognet}
2854129203Scognet
2855129203Scognet/*
2856129203Scognet-------------------------------------------------------------------------------
2857129203ScognetReturns the result of multiplying the double-precision floating-point values
2858129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2859129203Scognetfor Binary Floating-Point Arithmetic.
2860129203Scognet-------------------------------------------------------------------------------
2861129203Scognet*/
2862129203Scognetfloat64 float64_mul( float64 a, float64 b )
2863129203Scognet{
2864129203Scognet    flag aSign, bSign, zSign;
2865129203Scognet    int16 aExp, bExp, zExp;
2866129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
2867129203Scognet
2868129203Scognet    aSig = extractFloat64Frac( a );
2869129203Scognet    aExp = extractFloat64Exp( a );
2870129203Scognet    aSign = extractFloat64Sign( a );
2871129203Scognet    bSig = extractFloat64Frac( b );
2872129203Scognet    bExp = extractFloat64Exp( b );
2873129203Scognet    bSign = extractFloat64Sign( b );
2874129203Scognet    zSign = aSign ^ bSign;
2875129203Scognet    if ( aExp == 0x7FF ) {
2876129203Scognet        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2877129203Scognet            return propagateFloat64NaN( a, b );
2878129203Scognet        }
2879129203Scognet        if ( ( bExp | bSig ) == 0 ) {
2880129203Scognet            float_raise( float_flag_invalid );
2881129203Scognet            return float64_default_nan;
2882129203Scognet        }
2883129203Scognet        return packFloat64( zSign, 0x7FF, 0 );
2884129203Scognet    }
2885129203Scognet    if ( bExp == 0x7FF ) {
2886129203Scognet        if ( bSig ) return propagateFloat64NaN( a, b );
2887129203Scognet        if ( ( aExp | aSig ) == 0 ) {
2888129203Scognet            float_raise( float_flag_invalid );
2889129203Scognet            return float64_default_nan;
2890129203Scognet        }
2891129203Scognet        return packFloat64( zSign, 0x7FF, 0 );
2892129203Scognet    }
2893129203Scognet    if ( aExp == 0 ) {
2894129203Scognet        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2895129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2896129203Scognet    }
2897129203Scognet    if ( bExp == 0 ) {
2898129203Scognet        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2899129203Scognet        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2900129203Scognet    }
2901129203Scognet    zExp = aExp + bExp - 0x3FF;
2902129203Scognet    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2903129203Scognet    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2904129203Scognet    mul64To128( aSig, bSig, &zSig0, &zSig1 );
2905129203Scognet    zSig0 |= ( zSig1 != 0 );
2906129203Scognet    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2907129203Scognet        zSig0 <<= 1;
2908129203Scognet        --zExp;
2909129203Scognet    }
2910129203Scognet    return roundAndPackFloat64( zSign, zExp, zSig0 );
2911129203Scognet
2912129203Scognet}
2913129203Scognet
2914129203Scognet/*
2915129203Scognet-------------------------------------------------------------------------------
2916129203ScognetReturns the result of dividing the double-precision floating-point value `a'
2917129203Scognetby the corresponding value `b'.  The operation is performed according to
2918129203Scognetthe IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2919129203Scognet-------------------------------------------------------------------------------
2920129203Scognet*/
2921129203Scognetfloat64 float64_div( float64 a, float64 b )
2922129203Scognet{
2923129203Scognet    flag aSign, bSign, zSign;
2924129203Scognet    int16 aExp, bExp, zExp;
2925129203Scognet    bits64 aSig, bSig, zSig;
2926129203Scognet    bits64 rem0, rem1;
2927129203Scognet    bits64 term0, term1;
2928129203Scognet
2929129203Scognet    aSig = extractFloat64Frac( a );
2930129203Scognet    aExp = extractFloat64Exp( a );
2931129203Scognet    aSign = extractFloat64Sign( a );
2932129203Scognet    bSig = extractFloat64Frac( b );
2933129203Scognet    bExp = extractFloat64Exp( b );
2934129203Scognet    bSign = extractFloat64Sign( b );
2935129203Scognet    zSign = aSign ^ bSign;
2936129203Scognet    if ( aExp == 0x7FF ) {
2937129203Scognet        if ( aSig ) return propagateFloat64NaN( a, b );
2938129203Scognet        if ( bExp == 0x7FF ) {
2939129203Scognet            if ( bSig ) return propagateFloat64NaN( a, b );
2940129203Scognet            float_raise( float_flag_invalid );
2941129203Scognet            return float64_default_nan;
2942129203Scognet        }
2943129203Scognet        return packFloat64( zSign, 0x7FF, 0 );
2944129203Scognet    }
2945129203Scognet    if ( bExp == 0x7FF ) {
2946129203Scognet        if ( bSig ) return propagateFloat64NaN( a, b );
2947129203Scognet        return packFloat64( zSign, 0, 0 );
2948129203Scognet    }
2949129203Scognet    if ( bExp == 0 ) {
2950129203Scognet        if ( bSig == 0 ) {
2951129203Scognet            if ( ( aExp | aSig ) == 0 ) {
2952129203Scognet                float_raise( float_flag_invalid );
2953129203Scognet                return float64_default_nan;
2954129203Scognet            }
2955129203Scognet            float_raise( float_flag_divbyzero );
2956129203Scognet            return packFloat64( zSign, 0x7FF, 0 );
2957129203Scognet        }
2958129203Scognet        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2959129203Scognet    }
2960129203Scognet    if ( aExp == 0 ) {
2961129203Scognet        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2962129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2963129203Scognet    }
2964129203Scognet    zExp = aExp - bExp + 0x3FD;
2965129203Scognet    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2966129203Scognet    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2967129203Scognet    if ( bSig <= ( aSig + aSig ) ) {
2968129203Scognet        aSig >>= 1;
2969129203Scognet        ++zExp;
2970129203Scognet    }
2971129203Scognet    zSig = estimateDiv128To64( aSig, 0, bSig );
2972129203Scognet    if ( ( zSig & 0x1FF ) <= 2 ) {
2973129203Scognet        mul64To128( bSig, zSig, &term0, &term1 );
2974129203Scognet        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2975129203Scognet        while ( (sbits64) rem0 < 0 ) {
2976129203Scognet            --zSig;
2977129203Scognet            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2978129203Scognet        }
2979129203Scognet        zSig |= ( rem1 != 0 );
2980129203Scognet    }
2981129203Scognet    return roundAndPackFloat64( zSign, zExp, zSig );
2982129203Scognet
2983129203Scognet}
2984129203Scognet
2985129203Scognet#ifndef SOFTFLOAT_FOR_GCC
2986129203Scognet/*
2987129203Scognet-------------------------------------------------------------------------------
2988129203ScognetReturns the remainder of the double-precision floating-point value `a'
2989129203Scognetwith respect to the corresponding value `b'.  The operation is performed
2990129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2991129203Scognet-------------------------------------------------------------------------------
2992129203Scognet*/
2993129203Scognetfloat64 float64_rem( float64 a, float64 b )
2994129203Scognet{
2995129203Scognet    flag aSign, bSign, zSign;
2996129203Scognet    int16 aExp, bExp, expDiff;
2997129203Scognet    bits64 aSig, bSig;
2998129203Scognet    bits64 q, alternateASig;
2999129203Scognet    sbits64 sigMean;
3000129203Scognet
3001129203Scognet    aSig = extractFloat64Frac( a );
3002129203Scognet    aExp = extractFloat64Exp( a );
3003129203Scognet    aSign = extractFloat64Sign( a );
3004129203Scognet    bSig = extractFloat64Frac( b );
3005129203Scognet    bExp = extractFloat64Exp( b );
3006129203Scognet    bSign = extractFloat64Sign( b );
3007129203Scognet    if ( aExp == 0x7FF ) {
3008129203Scognet        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3009129203Scognet            return propagateFloat64NaN( a, b );
3010129203Scognet        }
3011129203Scognet        float_raise( float_flag_invalid );
3012129203Scognet        return float64_default_nan;
3013129203Scognet    }
3014129203Scognet    if ( bExp == 0x7FF ) {
3015129203Scognet        if ( bSig ) return propagateFloat64NaN( a, b );
3016129203Scognet        return a;
3017129203Scognet    }
3018129203Scognet    if ( bExp == 0 ) {
3019129203Scognet        if ( bSig == 0 ) {
3020129203Scognet            float_raise( float_flag_invalid );
3021129203Scognet            return float64_default_nan;
3022129203Scognet        }
3023129203Scognet        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3024129203Scognet    }
3025129203Scognet    if ( aExp == 0 ) {
3026129203Scognet        if ( aSig == 0 ) return a;
3027129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3028129203Scognet    }
3029129203Scognet    expDiff = aExp - bExp;
3030129203Scognet    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3031129203Scognet    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3032129203Scognet    if ( expDiff < 0 ) {
3033129203Scognet        if ( expDiff < -1 ) return a;
3034129203Scognet        aSig >>= 1;
3035129203Scognet    }
3036129203Scognet    q = ( bSig <= aSig );
3037129203Scognet    if ( q ) aSig -= bSig;
3038129203Scognet    expDiff -= 64;
3039129203Scognet    while ( 0 < expDiff ) {
3040129203Scognet        q = estimateDiv128To64( aSig, 0, bSig );
3041129203Scognet        q = ( 2 < q ) ? q - 2 : 0;
3042129203Scognet        aSig = - ( ( bSig>>2 ) * q );
3043129203Scognet        expDiff -= 62;
3044129203Scognet    }
3045129203Scognet    expDiff += 64;
3046129203Scognet    if ( 0 < expDiff ) {
3047129203Scognet        q = estimateDiv128To64( aSig, 0, bSig );
3048129203Scognet        q = ( 2 < q ) ? q - 2 : 0;
3049129203Scognet        q >>= 64 - expDiff;
3050129203Scognet        bSig >>= 2;
3051129203Scognet        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3052129203Scognet    }
3053129203Scognet    else {
3054129203Scognet        aSig >>= 2;
3055129203Scognet        bSig >>= 2;
3056129203Scognet    }
3057129203Scognet    do {
3058129203Scognet        alternateASig = aSig;
3059129203Scognet        ++q;
3060129203Scognet        aSig -= bSig;
3061129203Scognet    } while ( 0 <= (sbits64) aSig );
3062129203Scognet    sigMean = aSig + alternateASig;
3063129203Scognet    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3064129203Scognet        aSig = alternateASig;
3065129203Scognet    }
3066129203Scognet    zSign = ( (sbits64) aSig < 0 );
3067129203Scognet    if ( zSign ) aSig = - aSig;
3068129203Scognet    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
3069129203Scognet
3070129203Scognet}
3071129203Scognet
3072129203Scognet/*
3073129203Scognet-------------------------------------------------------------------------------
3074129203ScognetReturns the square root of the double-precision floating-point value `a'.
3075129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary
3076129203ScognetFloating-Point Arithmetic.
3077129203Scognet-------------------------------------------------------------------------------
3078129203Scognet*/
3079129203Scognetfloat64 float64_sqrt( float64 a )
3080129203Scognet{
3081129203Scognet    flag aSign;
3082129203Scognet    int16 aExp, zExp;
3083129203Scognet    bits64 aSig, zSig, doubleZSig;
3084129203Scognet    bits64 rem0, rem1, term0, term1;
3085129203Scognet
3086129203Scognet    aSig = extractFloat64Frac( a );
3087129203Scognet    aExp = extractFloat64Exp( a );
3088129203Scognet    aSign = extractFloat64Sign( a );
3089129203Scognet    if ( aExp == 0x7FF ) {
3090129203Scognet        if ( aSig ) return propagateFloat64NaN( a, a );
3091129203Scognet        if ( ! aSign ) return a;
3092129203Scognet        float_raise( float_flag_invalid );
3093129203Scognet        return float64_default_nan;
3094129203Scognet    }
3095129203Scognet    if ( aSign ) {
3096129203Scognet        if ( ( aExp | aSig ) == 0 ) return a;
3097129203Scognet        float_raise( float_flag_invalid );
3098129203Scognet        return float64_default_nan;
3099129203Scognet    }
3100129203Scognet    if ( aExp == 0 ) {
3101129203Scognet        if ( aSig == 0 ) return 0;
3102129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3103129203Scognet    }
3104129203Scognet    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3105129203Scognet    aSig |= LIT64( 0x0010000000000000 );
3106129203Scognet    zSig = estimateSqrt32( aExp, aSig>>21 );
3107129203Scognet    aSig <<= 9 - ( aExp & 1 );
3108129203Scognet    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3109129203Scognet    if ( ( zSig & 0x1FF ) <= 5 ) {
3110129203Scognet        doubleZSig = zSig<<1;
3111129203Scognet        mul64To128( zSig, zSig, &term0, &term1 );
3112129203Scognet        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3113129203Scognet        while ( (sbits64) rem0 < 0 ) {
3114129203Scognet            --zSig;
3115129203Scognet            doubleZSig -= 2;
3116129203Scognet            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3117129203Scognet        }
3118129203Scognet        zSig |= ( ( rem0 | rem1 ) != 0 );
3119129203Scognet    }
3120129203Scognet    return roundAndPackFloat64( 0, zExp, zSig );
3121129203Scognet
3122129203Scognet}
3123129203Scognet#endif
3124129203Scognet
3125129203Scognet/*
3126129203Scognet-------------------------------------------------------------------------------
3127129203ScognetReturns 1 if the double-precision floating-point value `a' is equal to the
3128129203Scognetcorresponding value `b', and 0 otherwise.  The comparison is performed
3129129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3130129203Scognet-------------------------------------------------------------------------------
3131129203Scognet*/
3132129203Scognetflag float64_eq( float64 a, float64 b )
3133129203Scognet{
3134129203Scognet
3135129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3136129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3137129203Scognet       ) {
3138129203Scognet        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3139129203Scognet            float_raise( float_flag_invalid );
3140129203Scognet        }
3141129203Scognet        return 0;
3142129203Scognet    }
3143129203Scognet    return ( a == b ) ||
3144129203Scognet	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
3145129203Scognet
3146129203Scognet}
3147129203Scognet
3148129203Scognet/*
3149129203Scognet-------------------------------------------------------------------------------
3150129203ScognetReturns 1 if the double-precision floating-point value `a' is less than or
3151129203Scognetequal to the corresponding value `b', and 0 otherwise.  The comparison is
3152129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
3153129203ScognetArithmetic.
3154129203Scognet-------------------------------------------------------------------------------
3155129203Scognet*/
3156129203Scognetflag float64_le( float64 a, float64 b )
3157129203Scognet{
3158129203Scognet    flag aSign, bSign;
3159129203Scognet
3160129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3161129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3162129203Scognet       ) {
3163129203Scognet        float_raise( float_flag_invalid );
3164129203Scognet        return 0;
3165129203Scognet    }
3166129203Scognet    aSign = extractFloat64Sign( a );
3167129203Scognet    bSign = extractFloat64Sign( b );
3168129203Scognet    if ( aSign != bSign )
3169129203Scognet	return aSign ||
3170129203Scognet	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
3171129203Scognet	      0 );
3172129203Scognet    return ( a == b ) ||
3173129203Scognet	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3174129203Scognet
3175129203Scognet}
3176129203Scognet
3177129203Scognet/*
3178129203Scognet-------------------------------------------------------------------------------
3179129203ScognetReturns 1 if the double-precision floating-point value `a' is less than
3180129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
3181129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3182129203Scognet-------------------------------------------------------------------------------
3183129203Scognet*/
3184129203Scognetflag float64_lt( float64 a, float64 b )
3185129203Scognet{
3186129203Scognet    flag aSign, bSign;
3187129203Scognet
3188129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3189129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3190129203Scognet       ) {
3191129203Scognet        float_raise( float_flag_invalid );
3192129203Scognet        return 0;
3193129203Scognet    }
3194129203Scognet    aSign = extractFloat64Sign( a );
3195129203Scognet    bSign = extractFloat64Sign( b );
3196129203Scognet    if ( aSign != bSign )
3197129203Scognet	return aSign &&
3198129203Scognet	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
3199129203Scognet	      0 );
3200129203Scognet    return ( a != b ) &&
3201129203Scognet	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3202129203Scognet
3203129203Scognet}
3204129203Scognet
3205129203Scognet#ifndef SOFTFLOAT_FOR_GCC
3206129203Scognet/*
3207129203Scognet-------------------------------------------------------------------------------
3208129203ScognetReturns 1 if the double-precision floating-point value `a' is equal to the
3209129203Scognetcorresponding value `b', and 0 otherwise.  The invalid exception is raised
3210129203Scognetif either operand is a NaN.  Otherwise, the comparison is performed
3211129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3212129203Scognet-------------------------------------------------------------------------------
3213129203Scognet*/
3214129203Scognetflag float64_eq_signaling( float64 a, float64 b )
3215129203Scognet{
3216129203Scognet
3217129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3218129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3219129203Scognet       ) {
3220129203Scognet        float_raise( float_flag_invalid );
3221129203Scognet        return 0;
3222129203Scognet    }
3223129203Scognet    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
3224129203Scognet
3225129203Scognet}
3226129203Scognet
3227129203Scognet/*
3228129203Scognet-------------------------------------------------------------------------------
3229129203ScognetReturns 1 if the double-precision floating-point value `a' is less than or
3230129203Scognetequal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3231129203Scognetcause an exception.  Otherwise, the comparison is performed according to the
3232129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
3233129203Scognet-------------------------------------------------------------------------------
3234129203Scognet*/
3235129203Scognetflag float64_le_quiet( float64 a, float64 b )
3236129203Scognet{
3237129203Scognet    flag aSign, bSign;
3238129203Scognet
3239129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3240129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3241129203Scognet       ) {
3242129203Scognet        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3243129203Scognet            float_raise( float_flag_invalid );
3244129203Scognet        }
3245129203Scognet        return 0;
3246129203Scognet    }
3247129203Scognet    aSign = extractFloat64Sign( a );
3248129203Scognet    bSign = extractFloat64Sign( b );
3249129203Scognet    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
3250129203Scognet    return ( a == b ) || ( aSign ^ ( a < b ) );
3251129203Scognet
3252129203Scognet}
3253129203Scognet
3254129203Scognet/*
3255129203Scognet-------------------------------------------------------------------------------
3256129203ScognetReturns 1 if the double-precision floating-point value `a' is less than
3257129203Scognetthe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3258129203Scognetexception.  Otherwise, the comparison is performed according to the IEC/IEEE
3259129203ScognetStandard for Binary Floating-Point Arithmetic.
3260129203Scognet-------------------------------------------------------------------------------
3261129203Scognet*/
3262129203Scognetflag float64_lt_quiet( float64 a, float64 b )
3263129203Scognet{
3264129203Scognet    flag aSign, bSign;
3265129203Scognet
3266129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3267129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3268129203Scognet       ) {
3269129203Scognet        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3270129203Scognet            float_raise( float_flag_invalid );
3271129203Scognet        }
3272129203Scognet        return 0;
3273129203Scognet    }
3274129203Scognet    aSign = extractFloat64Sign( a );
3275129203Scognet    bSign = extractFloat64Sign( b );
3276129203Scognet    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
3277129203Scognet    return ( a != b ) && ( aSign ^ ( a < b ) );
3278129203Scognet
3279129203Scognet}
3280129203Scognet#endif
3281129203Scognet
3282129203Scognet#ifdef FLOATX80
3283129203Scognet
3284129203Scognet/*
3285129203Scognet-------------------------------------------------------------------------------
3286129203ScognetReturns the result of converting the extended double-precision floating-
3287129203Scognetpoint value `a' to the 32-bit two's complement integer format.  The
3288129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3289129203ScognetFloating-Point Arithmetic---which means in particular that the conversion
3290129203Scognetis rounded according to the current rounding mode.  If `a' is a NaN, the
3291129203Scognetlargest positive integer is returned.  Otherwise, if the conversion
3292129203Scognetoverflows, the largest integer with the same sign as `a' is returned.
3293129203Scognet-------------------------------------------------------------------------------
3294129203Scognet*/
3295129203Scognetint32 floatx80_to_int32( floatx80 a )
3296129203Scognet{
3297129203Scognet    flag aSign;
3298129203Scognet    int32 aExp, shiftCount;
3299129203Scognet    bits64 aSig;
3300129203Scognet
3301129203Scognet    aSig = extractFloatx80Frac( a );
3302129203Scognet    aExp = extractFloatx80Exp( a );
3303129203Scognet    aSign = extractFloatx80Sign( a );
3304129203Scognet    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3305129203Scognet    shiftCount = 0x4037 - aExp;
3306129203Scognet    if ( shiftCount <= 0 ) shiftCount = 1;
3307129203Scognet    shift64RightJamming( aSig, shiftCount, &aSig );
3308129203Scognet    return roundAndPackInt32( aSign, aSig );
3309129203Scognet
3310129203Scognet}
3311129203Scognet
3312129203Scognet/*
3313129203Scognet-------------------------------------------------------------------------------
3314129203ScognetReturns the result of converting the extended double-precision floating-
3315129203Scognetpoint value `a' to the 32-bit two's complement integer format.  The
3316129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3317129203ScognetFloating-Point Arithmetic, except that the conversion is always rounded
3318129203Scognettoward zero.  If `a' is a NaN, the largest positive integer is returned.
3319129203ScognetOtherwise, if the conversion overflows, the largest integer with the same
3320129203Scognetsign as `a' is returned.
3321129203Scognet-------------------------------------------------------------------------------
3322129203Scognet*/
3323129203Scognetint32 floatx80_to_int32_round_to_zero( floatx80 a )
3324129203Scognet{
3325129203Scognet    flag aSign;
3326129203Scognet    int32 aExp, shiftCount;
3327129203Scognet    bits64 aSig, savedASig;
3328129203Scognet    int32 z;
3329129203Scognet
3330129203Scognet    aSig = extractFloatx80Frac( a );
3331129203Scognet    aExp = extractFloatx80Exp( a );
3332129203Scognet    aSign = extractFloatx80Sign( a );
3333129203Scognet    if ( 0x401E < aExp ) {
3334129203Scognet        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3335129203Scognet        goto invalid;
3336129203Scognet    }
3337129203Scognet    else if ( aExp < 0x3FFF ) {
3338129203Scognet        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
3339129203Scognet        return 0;
3340129203Scognet    }
3341129203Scognet    shiftCount = 0x403E - aExp;
3342129203Scognet    savedASig = aSig;
3343129203Scognet    aSig >>= shiftCount;
3344129203Scognet    z = aSig;
3345129203Scognet    if ( aSign ) z = - z;
3346129203Scognet    if ( ( z < 0 ) ^ aSign ) {
3347129203Scognet invalid:
3348129203Scognet        float_raise( float_flag_invalid );
3349129203Scognet        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3350129203Scognet    }
3351129203Scognet    if ( ( aSig<<shiftCount ) != savedASig ) {
3352129203Scognet        float_exception_flags |= float_flag_inexact;
3353129203Scognet    }
3354129203Scognet    return z;
3355129203Scognet
3356129203Scognet}
3357129203Scognet
3358129203Scognet/*
3359129203Scognet-------------------------------------------------------------------------------
3360129203ScognetReturns the result of converting the extended double-precision floating-
3361129203Scognetpoint value `a' to the 64-bit two's complement integer format.  The
3362129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3363129203ScognetFloating-Point Arithmetic---which means in particular that the conversion
3364129203Scognetis rounded according to the current rounding mode.  If `a' is a NaN,
3365129203Scognetthe largest positive integer is returned.  Otherwise, if the conversion
3366129203Scognetoverflows, the largest integer with the same sign as `a' is returned.
3367129203Scognet-------------------------------------------------------------------------------
3368129203Scognet*/
3369129203Scognetint64 floatx80_to_int64( floatx80 a )
3370129203Scognet{
3371129203Scognet    flag aSign;
3372129203Scognet    int32 aExp, shiftCount;
3373129203Scognet    bits64 aSig, aSigExtra;
3374129203Scognet
3375129203Scognet    aSig = extractFloatx80Frac( a );
3376129203Scognet    aExp = extractFloatx80Exp( a );
3377129203Scognet    aSign = extractFloatx80Sign( a );
3378129203Scognet    shiftCount = 0x403E - aExp;
3379129203Scognet    if ( shiftCount <= 0 ) {
3380129203Scognet        if ( shiftCount ) {
3381129203Scognet            float_raise( float_flag_invalid );
3382129203Scognet            if (    ! aSign
3383129203Scognet                 || (    ( aExp == 0x7FFF )
3384129203Scognet                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3385129203Scognet               ) {
3386129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
3387129203Scognet            }
3388129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
3389129203Scognet        }
3390129203Scognet        aSigExtra = 0;
3391129203Scognet    }
3392129203Scognet    else {
3393129203Scognet        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3394129203Scognet    }
3395129203Scognet    return roundAndPackInt64( aSign, aSig, aSigExtra );
3396129203Scognet
3397129203Scognet}
3398129203Scognet
3399129203Scognet/*
3400129203Scognet-------------------------------------------------------------------------------
3401129203ScognetReturns the result of converting the extended double-precision floating-
3402129203Scognetpoint value `a' to the 64-bit two's complement integer format.  The
3403129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3404129203ScognetFloating-Point Arithmetic, except that the conversion is always rounded
3405129203Scognettoward zero.  If `a' is a NaN, the largest positive integer is returned.
3406129203ScognetOtherwise, if the conversion overflows, the largest integer with the same
3407129203Scognetsign as `a' is returned.
3408129203Scognet-------------------------------------------------------------------------------
3409129203Scognet*/
3410129203Scognetint64 floatx80_to_int64_round_to_zero( floatx80 a )
3411129203Scognet{
3412129203Scognet    flag aSign;
3413129203Scognet    int32 aExp, shiftCount;
3414129203Scognet    bits64 aSig;
3415129203Scognet    int64 z;
3416129203Scognet
3417129203Scognet    aSig = extractFloatx80Frac( a );
3418129203Scognet    aExp = extractFloatx80Exp( a );
3419129203Scognet    aSign = extractFloatx80Sign( a );
3420129203Scognet    shiftCount = aExp - 0x403E;
3421129203Scognet    if ( 0 <= shiftCount ) {
3422129203Scognet        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3423129203Scognet        if ( ( a.high != 0xC03E ) || aSig ) {
3424129203Scognet            float_raise( float_flag_invalid );
3425129203Scognet            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3426129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
3427129203Scognet            }
3428129203Scognet        }
3429129203Scognet        return (sbits64) LIT64( 0x8000000000000000 );
3430129203Scognet    }
3431129203Scognet    else if ( aExp < 0x3FFF ) {
3432129203Scognet        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
3433129203Scognet        return 0;
3434129203Scognet    }
3435129203Scognet    z = aSig>>( - shiftCount );
3436129203Scognet    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3437129203Scognet        float_exception_flags |= float_flag_inexact;
3438129203Scognet    }
3439129203Scognet    if ( aSign ) z = - z;
3440129203Scognet    return z;
3441129203Scognet
3442129203Scognet}
3443129203Scognet
3444129203Scognet/*
3445129203Scognet-------------------------------------------------------------------------------
3446129203ScognetReturns the result of converting the extended double-precision floating-
3447129203Scognetpoint value `a' to the single-precision floating-point format.  The
3448129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3449129203ScognetFloating-Point Arithmetic.
3450129203Scognet-------------------------------------------------------------------------------
3451129203Scognet*/
3452129203Scognetfloat32 floatx80_to_float32( floatx80 a )
3453129203Scognet{
3454129203Scognet    flag aSign;
3455129203Scognet    int32 aExp;
3456129203Scognet    bits64 aSig;
3457129203Scognet
3458129203Scognet    aSig = extractFloatx80Frac( a );
3459129203Scognet    aExp = extractFloatx80Exp( a );
3460129203Scognet    aSign = extractFloatx80Sign( a );
3461129203Scognet    if ( aExp == 0x7FFF ) {
3462129203Scognet        if ( (bits64) ( aSig<<1 ) ) {
3463129203Scognet            return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
3464129203Scognet        }
3465129203Scognet        return packFloat32( aSign, 0xFF, 0 );
3466129203Scognet    }
3467129203Scognet    shift64RightJamming( aSig, 33, &aSig );
3468129203Scognet    if ( aExp || aSig ) aExp -= 0x3F81;
3469129203Scognet    return roundAndPackFloat32( aSign, aExp, aSig );
3470129203Scognet
3471129203Scognet}
3472129203Scognet
3473129203Scognet/*
3474129203Scognet-------------------------------------------------------------------------------
3475129203ScognetReturns the result of converting the extended double-precision floating-
3476129203Scognetpoint value `a' to the double-precision floating-point format.  The
3477129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3478129203ScognetFloating-Point Arithmetic.
3479129203Scognet-------------------------------------------------------------------------------
3480129203Scognet*/
3481129203Scognetfloat64 floatx80_to_float64( floatx80 a )
3482129203Scognet{
3483129203Scognet    flag aSign;
3484129203Scognet    int32 aExp;
3485129203Scognet    bits64 aSig, zSig;
3486129203Scognet
3487129203Scognet    aSig = extractFloatx80Frac( a );
3488129203Scognet    aExp = extractFloatx80Exp( a );
3489129203Scognet    aSign = extractFloatx80Sign( a );
3490129203Scognet    if ( aExp == 0x7FFF ) {
3491129203Scognet        if ( (bits64) ( aSig<<1 ) ) {
3492129203Scognet            return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
3493129203Scognet        }
3494129203Scognet        return packFloat64( aSign, 0x7FF, 0 );
3495129203Scognet    }
3496129203Scognet    shift64RightJamming( aSig, 1, &zSig );
3497129203Scognet    if ( aExp || aSig ) aExp -= 0x3C01;
3498129203Scognet    return roundAndPackFloat64( aSign, aExp, zSig );
3499129203Scognet
3500129203Scognet}
3501129203Scognet
3502129203Scognet#ifdef FLOAT128
3503129203Scognet
3504129203Scognet/*
3505129203Scognet-------------------------------------------------------------------------------
3506129203ScognetReturns the result of converting the extended double-precision floating-
3507129203Scognetpoint value `a' to the quadruple-precision floating-point format.  The
3508129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3509129203ScognetFloating-Point Arithmetic.
3510129203Scognet-------------------------------------------------------------------------------
3511129203Scognet*/
3512129203Scognetfloat128 floatx80_to_float128( floatx80 a )
3513129203Scognet{
3514129203Scognet    flag aSign;
3515129203Scognet    int16 aExp;
3516129203Scognet    bits64 aSig, zSig0, zSig1;
3517129203Scognet
3518129203Scognet    aSig = extractFloatx80Frac( a );
3519129203Scognet    aExp = extractFloatx80Exp( a );
3520129203Scognet    aSign = extractFloatx80Sign( a );
3521129203Scognet    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3522129203Scognet        return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
3523129203Scognet    }
3524129203Scognet    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3525129203Scognet    return packFloat128( aSign, aExp, zSig0, zSig1 );
3526129203Scognet
3527129203Scognet}
3528129203Scognet
3529129203Scognet#endif
3530129203Scognet
3531129203Scognet/*
3532129203Scognet-------------------------------------------------------------------------------
3533129203ScognetRounds the extended double-precision floating-point value `a' to an integer,
3534129203Scognetand returns the result as an extended quadruple-precision floating-point
3535129203Scognetvalue.  The operation is performed according to the IEC/IEEE Standard for
3536129203ScognetBinary Floating-Point Arithmetic.
3537129203Scognet-------------------------------------------------------------------------------
3538129203Scognet*/
3539129203Scognetfloatx80 floatx80_round_to_int( floatx80 a )
3540129203Scognet{
3541129203Scognet    flag aSign;
3542129203Scognet    int32 aExp;
3543129203Scognet    bits64 lastBitMask, roundBitsMask;
3544129203Scognet    int8 roundingMode;
3545129203Scognet    floatx80 z;
3546129203Scognet
3547129203Scognet    aExp = extractFloatx80Exp( a );
3548129203Scognet    if ( 0x403E <= aExp ) {
3549129203Scognet        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3550129203Scognet            return propagateFloatx80NaN( a, a );
3551129203Scognet        }
3552129203Scognet        return a;
3553129203Scognet    }
3554129203Scognet    if ( aExp < 0x3FFF ) {
3555129203Scognet        if (    ( aExp == 0 )
3556129203Scognet             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3557129203Scognet            return a;
3558129203Scognet        }
3559129203Scognet        float_exception_flags |= float_flag_inexact;
3560129203Scognet        aSign = extractFloatx80Sign( a );
3561129203Scognet        switch ( float_rounding_mode ) {
3562129203Scognet         case float_round_nearest_even:
3563129203Scognet            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3564129203Scognet               ) {
3565129203Scognet                return
3566129203Scognet                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3567129203Scognet            }
3568129203Scognet            break;
3569129203Scognet	 case float_round_to_zero:
3570129203Scognet	    break;
3571129203Scognet         case float_round_down:
3572129203Scognet            return
3573129203Scognet                  aSign ?
3574129203Scognet                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3575129203Scognet                : packFloatx80( 0, 0, 0 );
3576129203Scognet         case float_round_up:
3577129203Scognet            return
3578129203Scognet                  aSign ? packFloatx80( 1, 0, 0 )
3579129203Scognet                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3580129203Scognet        }
3581129203Scognet        return packFloatx80( aSign, 0, 0 );
3582129203Scognet    }
3583129203Scognet    lastBitMask = 1;
3584129203Scognet    lastBitMask <<= 0x403E - aExp;
3585129203Scognet    roundBitsMask = lastBitMask - 1;
3586129203Scognet    z = a;
3587129203Scognet    roundingMode = float_rounding_mode;
3588129203Scognet    if ( roundingMode == float_round_nearest_even ) {
3589129203Scognet        z.low += lastBitMask>>1;
3590129203Scognet        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3591129203Scognet    }
3592129203Scognet    else if ( roundingMode != float_round_to_zero ) {
3593129203Scognet        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3594129203Scognet            z.low += roundBitsMask;
3595129203Scognet        }
3596129203Scognet    }
3597129203Scognet    z.low &= ~ roundBitsMask;
3598129203Scognet    if ( z.low == 0 ) {
3599129203Scognet        ++z.high;
3600129203Scognet        z.low = LIT64( 0x8000000000000000 );
3601129203Scognet    }
3602129203Scognet    if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
3603129203Scognet    return z;
3604129203Scognet
3605129203Scognet}
3606129203Scognet
3607129203Scognet/*
3608129203Scognet-------------------------------------------------------------------------------
3609129203ScognetReturns the result of adding the absolute values of the extended double-
3610129203Scognetprecision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
3611129203Scognetnegated before being returned.  `zSign' is ignored if the result is a NaN.
3612129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary
3613129203ScognetFloating-Point Arithmetic.
3614129203Scognet-------------------------------------------------------------------------------
3615129203Scognet*/
3616129203Scognetstatic floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3617129203Scognet{
3618129203Scognet    int32 aExp, bExp, zExp;
3619129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
3620129203Scognet    int32 expDiff;
3621129203Scognet
3622129203Scognet    aSig = extractFloatx80Frac( a );
3623129203Scognet    aExp = extractFloatx80Exp( a );
3624129203Scognet    bSig = extractFloatx80Frac( b );
3625129203Scognet    bExp = extractFloatx80Exp( b );
3626129203Scognet    expDiff = aExp - bExp;
3627129203Scognet    if ( 0 < expDiff ) {
3628129203Scognet        if ( aExp == 0x7FFF ) {
3629129203Scognet            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3630129203Scognet            return a;
3631129203Scognet        }
3632129203Scognet        if ( bExp == 0 ) --expDiff;
3633129203Scognet        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3634129203Scognet        zExp = aExp;
3635129203Scognet    }
3636129203Scognet    else if ( expDiff < 0 ) {
3637129203Scognet        if ( bExp == 0x7FFF ) {
3638129203Scognet            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3639129203Scognet            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3640129203Scognet        }
3641129203Scognet        if ( aExp == 0 ) ++expDiff;
3642129203Scognet        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3643129203Scognet        zExp = bExp;
3644129203Scognet    }
3645129203Scognet    else {
3646129203Scognet        if ( aExp == 0x7FFF ) {
3647129203Scognet            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3648129203Scognet                return propagateFloatx80NaN( a, b );
3649129203Scognet            }
3650129203Scognet            return a;
3651129203Scognet        }
3652129203Scognet        zSig1 = 0;
3653129203Scognet        zSig0 = aSig + bSig;
3654129203Scognet        if ( aExp == 0 ) {
3655129203Scognet            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3656129203Scognet            goto roundAndPack;
3657129203Scognet        }
3658129203Scognet        zExp = aExp;
3659129203Scognet        goto shiftRight1;
3660129203Scognet    }
3661129203Scognet    zSig0 = aSig + bSig;
3662129203Scognet    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3663129203Scognet shiftRight1:
3664129203Scognet    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3665129203Scognet    zSig0 |= LIT64( 0x8000000000000000 );
3666129203Scognet    ++zExp;
3667129203Scognet roundAndPack:
3668129203Scognet    return
3669129203Scognet        roundAndPackFloatx80(
3670129203Scognet            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3671129203Scognet
3672129203Scognet}
3673129203Scognet
3674129203Scognet/*
3675129203Scognet-------------------------------------------------------------------------------
3676129203ScognetReturns the result of subtracting the absolute values of the extended
3677129203Scognetdouble-precision floating-point values `a' and `b'.  If `zSign' is 1, the
3678129203Scognetdifference is negated before being returned.  `zSign' is ignored if the
3679129203Scognetresult is a NaN.  The subtraction is performed according to the IEC/IEEE
3680129203ScognetStandard for Binary Floating-Point Arithmetic.
3681129203Scognet-------------------------------------------------------------------------------
3682129203Scognet*/
3683129203Scognetstatic floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3684129203Scognet{
3685129203Scognet    int32 aExp, bExp, zExp;
3686129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
3687129203Scognet    int32 expDiff;
3688129203Scognet    floatx80 z;
3689129203Scognet
3690129203Scognet    aSig = extractFloatx80Frac( a );
3691129203Scognet    aExp = extractFloatx80Exp( a );
3692129203Scognet    bSig = extractFloatx80Frac( b );
3693129203Scognet    bExp = extractFloatx80Exp( b );
3694129203Scognet    expDiff = aExp - bExp;
3695129203Scognet    if ( 0 < expDiff ) goto aExpBigger;
3696129203Scognet    if ( expDiff < 0 ) goto bExpBigger;
3697129203Scognet    if ( aExp == 0x7FFF ) {
3698129203Scognet        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3699129203Scognet            return propagateFloatx80NaN( a, b );
3700129203Scognet        }
3701129203Scognet        float_raise( float_flag_invalid );
3702129203Scognet        z.low = floatx80_default_nan_low;
3703129203Scognet        z.high = floatx80_default_nan_high;
3704129203Scognet        return z;
3705129203Scognet    }
3706129203Scognet    if ( aExp == 0 ) {
3707129203Scognet        aExp = 1;
3708129203Scognet        bExp = 1;
3709129203Scognet    }
3710129203Scognet    zSig1 = 0;
3711129203Scognet    if ( bSig < aSig ) goto aBigger;
3712129203Scognet    if ( aSig < bSig ) goto bBigger;
3713129203Scognet    return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
3714129203Scognet bExpBigger:
3715129203Scognet    if ( bExp == 0x7FFF ) {
3716129203Scognet        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3717129203Scognet        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3718129203Scognet    }
3719129203Scognet    if ( aExp == 0 ) ++expDiff;
3720129203Scognet    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3721129203Scognet bBigger:
3722129203Scognet    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3723129203Scognet    zExp = bExp;
3724129203Scognet    zSign ^= 1;
3725129203Scognet    goto normalizeRoundAndPack;
3726129203Scognet aExpBigger:
3727129203Scognet    if ( aExp == 0x7FFF ) {
3728129203Scognet        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3729129203Scognet        return a;
3730129203Scognet    }
3731129203Scognet    if ( bExp == 0 ) --expDiff;
3732129203Scognet    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3733129203Scognet aBigger:
3734129203Scognet    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3735129203Scognet    zExp = aExp;
3736129203Scognet normalizeRoundAndPack:
3737129203Scognet    return
3738129203Scognet        normalizeRoundAndPackFloatx80(
3739129203Scognet            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3740129203Scognet
3741129203Scognet}
3742129203Scognet
3743129203Scognet/*
3744129203Scognet-------------------------------------------------------------------------------
3745129203ScognetReturns the result of adding the extended double-precision floating-point
3746129203Scognetvalues `a' and `b'.  The operation is performed according to the IEC/IEEE
3747129203ScognetStandard for Binary Floating-Point Arithmetic.
3748129203Scognet-------------------------------------------------------------------------------
3749129203Scognet*/
3750129203Scognetfloatx80 floatx80_add( floatx80 a, floatx80 b )
3751129203Scognet{
3752129203Scognet    flag aSign, bSign;
3753129203Scognet
3754129203Scognet    aSign = extractFloatx80Sign( a );
3755129203Scognet    bSign = extractFloatx80Sign( b );
3756129203Scognet    if ( aSign == bSign ) {
3757129203Scognet        return addFloatx80Sigs( a, b, aSign );
3758129203Scognet    }
3759129203Scognet    else {
3760129203Scognet        return subFloatx80Sigs( a, b, aSign );
3761129203Scognet    }
3762129203Scognet
3763129203Scognet}
3764129203Scognet
3765129203Scognet/*
3766129203Scognet-------------------------------------------------------------------------------
3767129203ScognetReturns the result of subtracting the extended double-precision floating-
3768129203Scognetpoint values `a' and `b'.  The operation is performed according to the
3769129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
3770129203Scognet-------------------------------------------------------------------------------
3771129203Scognet*/
3772129203Scognetfloatx80 floatx80_sub( floatx80 a, floatx80 b )
3773129203Scognet{
3774129203Scognet    flag aSign, bSign;
3775129203Scognet
3776129203Scognet    aSign = extractFloatx80Sign( a );
3777129203Scognet    bSign = extractFloatx80Sign( b );
3778129203Scognet    if ( aSign == bSign ) {
3779129203Scognet        return subFloatx80Sigs( a, b, aSign );
3780129203Scognet    }
3781129203Scognet    else {
3782129203Scognet        return addFloatx80Sigs( a, b, aSign );
3783129203Scognet    }
3784129203Scognet
3785129203Scognet}
3786129203Scognet
3787129203Scognet/*
3788129203Scognet-------------------------------------------------------------------------------
3789129203ScognetReturns the result of multiplying the extended double-precision floating-
3790129203Scognetpoint values `a' and `b'.  The operation is performed according to the
3791129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
3792129203Scognet-------------------------------------------------------------------------------
3793129203Scognet*/
3794129203Scognetfloatx80 floatx80_mul( floatx80 a, floatx80 b )
3795129203Scognet{
3796129203Scognet    flag aSign, bSign, zSign;
3797129203Scognet    int32 aExp, bExp, zExp;
3798129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
3799129203Scognet    floatx80 z;
3800129203Scognet
3801129203Scognet    aSig = extractFloatx80Frac( a );
3802129203Scognet    aExp = extractFloatx80Exp( a );
3803129203Scognet    aSign = extractFloatx80Sign( a );
3804129203Scognet    bSig = extractFloatx80Frac( b );
3805129203Scognet    bExp = extractFloatx80Exp( b );
3806129203Scognet    bSign = extractFloatx80Sign( b );
3807129203Scognet    zSign = aSign ^ bSign;
3808129203Scognet    if ( aExp == 0x7FFF ) {
3809129203Scognet        if (    (bits64) ( aSig<<1 )
3810129203Scognet             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3811129203Scognet            return propagateFloatx80NaN( a, b );
3812129203Scognet        }
3813129203Scognet        if ( ( bExp | bSig ) == 0 ) goto invalid;
3814129203Scognet        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3815129203Scognet    }
3816129203Scognet    if ( bExp == 0x7FFF ) {
3817129203Scognet        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3818129203Scognet        if ( ( aExp | aSig ) == 0 ) {
3819129203Scognet invalid:
3820129203Scognet            float_raise( float_flag_invalid );
3821129203Scognet            z.low = floatx80_default_nan_low;
3822129203Scognet            z.high = floatx80_default_nan_high;
3823129203Scognet            return z;
3824129203Scognet        }
3825129203Scognet        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3826129203Scognet    }
3827129203Scognet    if ( aExp == 0 ) {
3828129203Scognet        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3829129203Scognet        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3830129203Scognet    }
3831129203Scognet    if ( bExp == 0 ) {
3832129203Scognet        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3833129203Scognet        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3834129203Scognet    }
3835129203Scognet    zExp = aExp + bExp - 0x3FFE;
3836129203Scognet    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3837129203Scognet    if ( 0 < (sbits64) zSig0 ) {
3838129203Scognet        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3839129203Scognet        --zExp;
3840129203Scognet    }
3841129203Scognet    return
3842129203Scognet        roundAndPackFloatx80(
3843129203Scognet            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3844129203Scognet
3845129203Scognet}
3846129203Scognet
3847129203Scognet/*
3848129203Scognet-------------------------------------------------------------------------------
3849129203ScognetReturns the result of dividing the extended double-precision floating-point
3850129203Scognetvalue `a' by the corresponding value `b'.  The operation is performed
3851129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3852129203Scognet-------------------------------------------------------------------------------
3853129203Scognet*/
3854129203Scognetfloatx80 floatx80_div( floatx80 a, floatx80 b )
3855129203Scognet{
3856129203Scognet    flag aSign, bSign, zSign;
3857129203Scognet    int32 aExp, bExp, zExp;
3858129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
3859129203Scognet    bits64 rem0, rem1, rem2, term0, term1, term2;
3860129203Scognet    floatx80 z;
3861129203Scognet
3862129203Scognet    aSig = extractFloatx80Frac( a );
3863129203Scognet    aExp = extractFloatx80Exp( a );
3864129203Scognet    aSign = extractFloatx80Sign( a );
3865129203Scognet    bSig = extractFloatx80Frac( b );
3866129203Scognet    bExp = extractFloatx80Exp( b );
3867129203Scognet    bSign = extractFloatx80Sign( b );
3868129203Scognet    zSign = aSign ^ bSign;
3869129203Scognet    if ( aExp == 0x7FFF ) {
3870129203Scognet        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3871129203Scognet        if ( bExp == 0x7FFF ) {
3872129203Scognet            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3873129203Scognet            goto invalid;
3874129203Scognet        }
3875129203Scognet        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3876129203Scognet    }
3877129203Scognet    if ( bExp == 0x7FFF ) {
3878129203Scognet        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3879129203Scognet        return packFloatx80( zSign, 0, 0 );
3880129203Scognet    }
3881129203Scognet    if ( bExp == 0 ) {
3882129203Scognet        if ( bSig == 0 ) {
3883129203Scognet            if ( ( aExp | aSig ) == 0 ) {
3884129203Scognet invalid:
3885129203Scognet                float_raise( float_flag_invalid );
3886129203Scognet                z.low = floatx80_default_nan_low;
3887129203Scognet                z.high = floatx80_default_nan_high;
3888129203Scognet                return z;
3889129203Scognet            }
3890129203Scognet            float_raise( float_flag_divbyzero );
3891129203Scognet            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3892129203Scognet        }
3893129203Scognet        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3894129203Scognet    }
3895129203Scognet    if ( aExp == 0 ) {
3896129203Scognet        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3897129203Scognet        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3898129203Scognet    }
3899129203Scognet    zExp = aExp - bExp + 0x3FFE;
3900129203Scognet    rem1 = 0;
3901129203Scognet    if ( bSig <= aSig ) {
3902129203Scognet        shift128Right( aSig, 0, 1, &aSig, &rem1 );
3903129203Scognet        ++zExp;
3904129203Scognet    }
3905129203Scognet    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3906129203Scognet    mul64To128( bSig, zSig0, &term0, &term1 );
3907129203Scognet    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3908129203Scognet    while ( (sbits64) rem0 < 0 ) {
3909129203Scognet        --zSig0;
3910129203Scognet        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3911129203Scognet    }
3912129203Scognet    zSig1 = estimateDiv128To64( rem1, 0, bSig );
3913129203Scognet    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3914129203Scognet        mul64To128( bSig, zSig1, &term1, &term2 );
3915129203Scognet        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3916129203Scognet        while ( (sbits64) rem1 < 0 ) {
3917129203Scognet            --zSig1;
3918129203Scognet            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3919129203Scognet        }
3920129203Scognet        zSig1 |= ( ( rem1 | rem2 ) != 0 );
3921129203Scognet    }
3922129203Scognet    return
3923129203Scognet        roundAndPackFloatx80(
3924129203Scognet            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3925129203Scognet
3926129203Scognet}
3927129203Scognet
3928129203Scognet/*
3929129203Scognet-------------------------------------------------------------------------------
3930129203ScognetReturns the remainder of the extended double-precision floating-point value
3931129203Scognet`a' with respect to the corresponding value `b'.  The operation is performed
3932129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3933129203Scognet-------------------------------------------------------------------------------
3934129203Scognet*/
3935129203Scognetfloatx80 floatx80_rem( floatx80 a, floatx80 b )
3936129203Scognet{
3937129203Scognet    flag aSign, bSign, zSign;
3938129203Scognet    int32 aExp, bExp, expDiff;
3939129203Scognet    bits64 aSig0, aSig1, bSig;
3940129203Scognet    bits64 q, term0, term1, alternateASig0, alternateASig1;
3941129203Scognet    floatx80 z;
3942129203Scognet
3943129203Scognet    aSig0 = extractFloatx80Frac( a );
3944129203Scognet    aExp = extractFloatx80Exp( a );
3945129203Scognet    aSign = extractFloatx80Sign( a );
3946129203Scognet    bSig = extractFloatx80Frac( b );
3947129203Scognet    bExp = extractFloatx80Exp( b );
3948129203Scognet    bSign = extractFloatx80Sign( b );
3949129203Scognet    if ( aExp == 0x7FFF ) {
3950129203Scognet        if (    (bits64) ( aSig0<<1 )
3951129203Scognet             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3952129203Scognet            return propagateFloatx80NaN( a, b );
3953129203Scognet        }
3954129203Scognet        goto invalid;
3955129203Scognet    }
3956129203Scognet    if ( bExp == 0x7FFF ) {
3957129203Scognet        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3958129203Scognet        return a;
3959129203Scognet    }
3960129203Scognet    if ( bExp == 0 ) {
3961129203Scognet        if ( bSig == 0 ) {
3962129203Scognet invalid:
3963129203Scognet            float_raise( float_flag_invalid );
3964129203Scognet            z.low = floatx80_default_nan_low;
3965129203Scognet            z.high = floatx80_default_nan_high;
3966129203Scognet            return z;
3967129203Scognet        }
3968129203Scognet        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3969129203Scognet    }
3970129203Scognet    if ( aExp == 0 ) {
3971129203Scognet        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3972129203Scognet        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3973129203Scognet    }
3974129203Scognet    bSig |= LIT64( 0x8000000000000000 );
3975129203Scognet    zSign = aSign;
3976129203Scognet    expDiff = aExp - bExp;
3977129203Scognet    aSig1 = 0;
3978129203Scognet    if ( expDiff < 0 ) {
3979129203Scognet        if ( expDiff < -1 ) return a;
3980129203Scognet        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3981129203Scognet        expDiff = 0;
3982129203Scognet    }
3983129203Scognet    q = ( bSig <= aSig0 );
3984129203Scognet    if ( q ) aSig0 -= bSig;
3985129203Scognet    expDiff -= 64;
3986129203Scognet    while ( 0 < expDiff ) {
3987129203Scognet        q = estimateDiv128To64( aSig0, aSig1, bSig );
3988129203Scognet        q = ( 2 < q ) ? q - 2 : 0;
3989129203Scognet        mul64To128( bSig, q, &term0, &term1 );
3990129203Scognet        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3991129203Scognet        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3992129203Scognet        expDiff -= 62;
3993129203Scognet    }
3994129203Scognet    expDiff += 64;
3995129203Scognet    if ( 0 < expDiff ) {
3996129203Scognet        q = estimateDiv128To64( aSig0, aSig1, bSig );
3997129203Scognet        q = ( 2 < q ) ? q - 2 : 0;
3998129203Scognet        q >>= 64 - expDiff;
3999129203Scognet        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4000129203Scognet        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4001129203Scognet        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4002129203Scognet        while ( le128( term0, term1, aSig0, aSig1 ) ) {
4003129203Scognet            ++q;
4004129203Scognet            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4005129203Scognet        }
4006129203Scognet    }
4007129203Scognet    else {
4008129203Scognet        term1 = 0;
4009129203Scognet        term0 = bSig;
4010129203Scognet    }
4011129203Scognet    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4012129203Scognet    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4013129203Scognet         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4014129203Scognet              && ( q & 1 ) )
4015129203Scognet       ) {
4016129203Scognet        aSig0 = alternateASig0;
4017129203Scognet        aSig1 = alternateASig1;
4018129203Scognet        zSign = ! zSign;
4019129203Scognet    }
4020129203Scognet    return
4021129203Scognet        normalizeRoundAndPackFloatx80(
4022129203Scognet            80, zSign, bExp + expDiff, aSig0, aSig1 );
4023129203Scognet
4024129203Scognet}
4025129203Scognet
4026129203Scognet/*
4027129203Scognet-------------------------------------------------------------------------------
4028129203ScognetReturns the square root of the extended double-precision floating-point
4029129203Scognetvalue `a'.  The operation is performed according to the IEC/IEEE Standard
4030129203Scognetfor Binary Floating-Point Arithmetic.
4031129203Scognet-------------------------------------------------------------------------------
4032129203Scognet*/
4033129203Scognetfloatx80 floatx80_sqrt( floatx80 a )
4034129203Scognet{
4035129203Scognet    flag aSign;
4036129203Scognet    int32 aExp, zExp;
4037129203Scognet    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4038129203Scognet    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4039129203Scognet    floatx80 z;
4040129203Scognet
4041129203Scognet    aSig0 = extractFloatx80Frac( a );
4042129203Scognet    aExp = extractFloatx80Exp( a );
4043129203Scognet    aSign = extractFloatx80Sign( a );
4044129203Scognet    if ( aExp == 0x7FFF ) {
4045129203Scognet        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
4046129203Scognet        if ( ! aSign ) return a;
4047129203Scognet        goto invalid;
4048129203Scognet    }
4049129203Scognet    if ( aSign ) {
4050129203Scognet        if ( ( aExp | aSig0 ) == 0 ) return a;
4051129203Scognet invalid:
4052129203Scognet        float_raise( float_flag_invalid );
4053129203Scognet        z.low = floatx80_default_nan_low;
4054129203Scognet        z.high = floatx80_default_nan_high;
4055129203Scognet        return z;
4056129203Scognet    }
4057129203Scognet    if ( aExp == 0 ) {
4058129203Scognet        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4059129203Scognet        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4060129203Scognet    }
4061129203Scognet    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4062129203Scognet    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4063129203Scognet    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4064129203Scognet    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4065129203Scognet    doubleZSig0 = zSig0<<1;
4066129203Scognet    mul64To128( zSig0, zSig0, &term0, &term1 );
4067129203Scognet    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4068129203Scognet    while ( (sbits64) rem0 < 0 ) {
4069129203Scognet        --zSig0;
4070129203Scognet        doubleZSig0 -= 2;
4071129203Scognet        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4072129203Scognet    }
4073129203Scognet    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4074129203Scognet    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4075129203Scognet        if ( zSig1 == 0 ) zSig1 = 1;
4076129203Scognet        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4077129203Scognet        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4078129203Scognet        mul64To128( zSig1, zSig1, &term2, &term3 );
4079129203Scognet        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4080129203Scognet        while ( (sbits64) rem1 < 0 ) {
4081129203Scognet            --zSig1;
4082129203Scognet            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4083129203Scognet            term3 |= 1;
4084129203Scognet            term2 |= doubleZSig0;
4085129203Scognet            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4086129203Scognet        }
4087129203Scognet        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4088129203Scognet    }
4089129203Scognet    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4090129203Scognet    zSig0 |= doubleZSig0;
4091129203Scognet    return
4092129203Scognet        roundAndPackFloatx80(
4093129203Scognet            floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
4094129203Scognet
4095129203Scognet}
4096129203Scognet
4097129203Scognet/*
4098129203Scognet-------------------------------------------------------------------------------
4099129203ScognetReturns 1 if the extended double-precision floating-point value `a' is
4100129203Scognetequal to the corresponding value `b', and 0 otherwise.  The comparison is
4101129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
4102129203ScognetArithmetic.
4103129203Scognet-------------------------------------------------------------------------------
4104129203Scognet*/
4105129203Scognetflag floatx80_eq( floatx80 a, floatx80 b )
4106129203Scognet{
4107129203Scognet
4108129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4109129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4110129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4111129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4112129203Scognet       ) {
4113129203Scognet        if (    floatx80_is_signaling_nan( a )
4114129203Scognet             || floatx80_is_signaling_nan( b ) ) {
4115129203Scognet            float_raise( float_flag_invalid );
4116129203Scognet        }
4117129203Scognet        return 0;
4118129203Scognet    }
4119129203Scognet    return
4120129203Scognet           ( a.low == b.low )
4121129203Scognet        && (    ( a.high == b.high )
4122129203Scognet             || (    ( a.low == 0 )
4123129203Scognet                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4124129203Scognet           );
4125129203Scognet
4126129203Scognet}
4127129203Scognet
4128129203Scognet/*
4129129203Scognet-------------------------------------------------------------------------------
4130129203ScognetReturns 1 if the extended double-precision floating-point value `a' is
4131129203Scognetless than or equal to the corresponding value `b', and 0 otherwise.  The
4132129203Scognetcomparison is performed according to the IEC/IEEE Standard for Binary
4133129203ScognetFloating-Point Arithmetic.
4134129203Scognet-------------------------------------------------------------------------------
4135129203Scognet*/
4136129203Scognetflag floatx80_le( floatx80 a, floatx80 b )
4137129203Scognet{
4138129203Scognet    flag aSign, bSign;
4139129203Scognet
4140129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4141129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4142129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4143129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4144129203Scognet       ) {
4145129203Scognet        float_raise( float_flag_invalid );
4146129203Scognet        return 0;
4147129203Scognet    }
4148129203Scognet    aSign = extractFloatx80Sign( a );
4149129203Scognet    bSign = extractFloatx80Sign( b );
4150129203Scognet    if ( aSign != bSign ) {
4151129203Scognet        return
4152129203Scognet               aSign
4153129203Scognet            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4154129203Scognet                 == 0 );
4155129203Scognet    }
4156129203Scognet    return
4157129203Scognet          aSign ? le128( b.high, b.low, a.high, a.low )
4158129203Scognet        : le128( a.high, a.low, b.high, b.low );
4159129203Scognet
4160129203Scognet}
4161129203Scognet
4162129203Scognet/*
4163129203Scognet-------------------------------------------------------------------------------
4164129203ScognetReturns 1 if the extended double-precision floating-point value `a' is
4165129203Scognetless than the corresponding value `b', and 0 otherwise.  The comparison
4166129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4167129203ScognetArithmetic.
4168129203Scognet-------------------------------------------------------------------------------
4169129203Scognet*/
4170129203Scognetflag floatx80_lt( floatx80 a, floatx80 b )
4171129203Scognet{
4172129203Scognet    flag aSign, bSign;
4173129203Scognet
4174129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4175129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4176129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4177129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4178129203Scognet       ) {
4179129203Scognet        float_raise( float_flag_invalid );
4180129203Scognet        return 0;
4181129203Scognet    }
4182129203Scognet    aSign = extractFloatx80Sign( a );
4183129203Scognet    bSign = extractFloatx80Sign( b );
4184129203Scognet    if ( aSign != bSign ) {
4185129203Scognet        return
4186129203Scognet               aSign
4187129203Scognet            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4188129203Scognet                 != 0 );
4189129203Scognet    }
4190129203Scognet    return
4191129203Scognet          aSign ? lt128( b.high, b.low, a.high, a.low )
4192129203Scognet        : lt128( a.high, a.low, b.high, b.low );
4193129203Scognet
4194129203Scognet}
4195129203Scognet
4196129203Scognet/*
4197129203Scognet-------------------------------------------------------------------------------
4198129203ScognetReturns 1 if the extended double-precision floating-point value `a' is equal
4199129203Scognetto the corresponding value `b', and 0 otherwise.  The invalid exception is
4200129203Scognetraised if either operand is a NaN.  Otherwise, the comparison is performed
4201129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4202129203Scognet-------------------------------------------------------------------------------
4203129203Scognet*/
4204129203Scognetflag floatx80_eq_signaling( floatx80 a, floatx80 b )
4205129203Scognet{
4206129203Scognet
4207129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4208129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4209129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4210129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4211129203Scognet       ) {
4212129203Scognet        float_raise( float_flag_invalid );
4213129203Scognet        return 0;
4214129203Scognet    }
4215129203Scognet    return
4216129203Scognet           ( a.low == b.low )
4217129203Scognet        && (    ( a.high == b.high )
4218129203Scognet             || (    ( a.low == 0 )
4219129203Scognet                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4220129203Scognet           );
4221129203Scognet
4222129203Scognet}
4223129203Scognet
4224129203Scognet/*
4225129203Scognet-------------------------------------------------------------------------------
4226129203ScognetReturns 1 if the extended double-precision floating-point value `a' is less
4227129203Scognetthan or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4228129203Scognetdo not cause an exception.  Otherwise, the comparison is performed according
4229129203Scognetto the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4230129203Scognet-------------------------------------------------------------------------------
4231129203Scognet*/
4232129203Scognetflag floatx80_le_quiet( floatx80 a, floatx80 b )
4233129203Scognet{
4234129203Scognet    flag aSign, bSign;
4235129203Scognet
4236129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4237129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4238129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4239129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4240129203Scognet       ) {
4241129203Scognet        if (    floatx80_is_signaling_nan( a )
4242129203Scognet             || floatx80_is_signaling_nan( b ) ) {
4243129203Scognet            float_raise( float_flag_invalid );
4244129203Scognet        }
4245129203Scognet        return 0;
4246129203Scognet    }
4247129203Scognet    aSign = extractFloatx80Sign( a );
4248129203Scognet    bSign = extractFloatx80Sign( b );
4249129203Scognet    if ( aSign != bSign ) {
4250129203Scognet        return
4251129203Scognet               aSign
4252129203Scognet            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4253129203Scognet                 == 0 );
4254129203Scognet    }
4255129203Scognet    return
4256129203Scognet          aSign ? le128( b.high, b.low, a.high, a.low )
4257129203Scognet        : le128( a.high, a.low, b.high, b.low );
4258129203Scognet
4259129203Scognet}
4260129203Scognet
4261129203Scognet/*
4262129203Scognet-------------------------------------------------------------------------------
4263129203ScognetReturns 1 if the extended double-precision floating-point value `a' is less
4264129203Scognetthan the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4265129203Scognetan exception.  Otherwise, the comparison is performed according to the
4266129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
4267129203Scognet-------------------------------------------------------------------------------
4268129203Scognet*/
4269129203Scognetflag floatx80_lt_quiet( floatx80 a, floatx80 b )
4270129203Scognet{
4271129203Scognet    flag aSign, bSign;
4272129203Scognet
4273129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4274129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4275129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4276129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4277129203Scognet       ) {
4278129203Scognet        if (    floatx80_is_signaling_nan( a )
4279129203Scognet             || floatx80_is_signaling_nan( b ) ) {
4280129203Scognet            float_raise( float_flag_invalid );
4281129203Scognet        }
4282129203Scognet        return 0;
4283129203Scognet    }
4284129203Scognet    aSign = extractFloatx80Sign( a );
4285129203Scognet    bSign = extractFloatx80Sign( b );
4286129203Scognet    if ( aSign != bSign ) {
4287129203Scognet        return
4288129203Scognet               aSign
4289129203Scognet            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4290129203Scognet                 != 0 );
4291129203Scognet    }
4292129203Scognet    return
4293129203Scognet          aSign ? lt128( b.high, b.low, a.high, a.low )
4294129203Scognet        : lt128( a.high, a.low, b.high, b.low );
4295129203Scognet
4296129203Scognet}
4297129203Scognet
4298129203Scognet#endif
4299129203Scognet
4300129203Scognet#ifdef FLOAT128
4301129203Scognet
4302129203Scognet/*
4303129203Scognet-------------------------------------------------------------------------------
4304129203ScognetReturns the result of converting the quadruple-precision floating-point
4305129203Scognetvalue `a' to the 32-bit two's complement integer format.  The conversion
4306129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4307129203ScognetArithmetic---which means in particular that the conversion is rounded
4308129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
4309129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
4310129203Scognetlargest integer with the same sign as `a' is returned.
4311129203Scognet-------------------------------------------------------------------------------
4312129203Scognet*/
4313129203Scognetint32 float128_to_int32( float128 a )
4314129203Scognet{
4315129203Scognet    flag aSign;
4316129203Scognet    int32 aExp, shiftCount;
4317129203Scognet    bits64 aSig0, aSig1;
4318129203Scognet
4319129203Scognet    aSig1 = extractFloat128Frac1( a );
4320129203Scognet    aSig0 = extractFloat128Frac0( a );
4321129203Scognet    aExp = extractFloat128Exp( a );
4322129203Scognet    aSign = extractFloat128Sign( a );
4323129203Scognet    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4324129203Scognet    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4325129203Scognet    aSig0 |= ( aSig1 != 0 );
4326129203Scognet    shiftCount = 0x4028 - aExp;
4327129203Scognet    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4328129203Scognet    return roundAndPackInt32( aSign, aSig0 );
4329129203Scognet
4330129203Scognet}
4331129203Scognet
4332129203Scognet/*
4333129203Scognet-------------------------------------------------------------------------------
4334129203ScognetReturns the result of converting the quadruple-precision floating-point
4335129203Scognetvalue `a' to the 32-bit two's complement integer format.  The conversion
4336129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4337129203ScognetArithmetic, except that the conversion is always rounded toward zero.  If
4338129203Scognet`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4339129203Scognetconversion overflows, the largest integer with the same sign as `a' is
4340129203Scognetreturned.
4341129203Scognet-------------------------------------------------------------------------------
4342129203Scognet*/
4343129203Scognetint32 float128_to_int32_round_to_zero( float128 a )
4344129203Scognet{
4345129203Scognet    flag aSign;
4346129203Scognet    int32 aExp, shiftCount;
4347129203Scognet    bits64 aSig0, aSig1, savedASig;
4348129203Scognet    int32 z;
4349129203Scognet
4350129203Scognet    aSig1 = extractFloat128Frac1( a );
4351129203Scognet    aSig0 = extractFloat128Frac0( a );
4352129203Scognet    aExp = extractFloat128Exp( a );
4353129203Scognet    aSign = extractFloat128Sign( a );
4354129203Scognet    aSig0 |= ( aSig1 != 0 );
4355129203Scognet    if ( 0x401E < aExp ) {
4356129203Scognet        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4357129203Scognet        goto invalid;
4358129203Scognet    }
4359129203Scognet    else if ( aExp < 0x3FFF ) {
4360129203Scognet        if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
4361129203Scognet        return 0;
4362129203Scognet    }
4363129203Scognet    aSig0 |= LIT64( 0x0001000000000000 );
4364129203Scognet    shiftCount = 0x402F - aExp;
4365129203Scognet    savedASig = aSig0;
4366129203Scognet    aSig0 >>= shiftCount;
4367129203Scognet    z = aSig0;
4368129203Scognet    if ( aSign ) z = - z;
4369129203Scognet    if ( ( z < 0 ) ^ aSign ) {
4370129203Scognet invalid:
4371129203Scognet        float_raise( float_flag_invalid );
4372129203Scognet        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4373129203Scognet    }
4374129203Scognet    if ( ( aSig0<<shiftCount ) != savedASig ) {
4375129203Scognet        float_exception_flags |= float_flag_inexact;
4376129203Scognet    }
4377129203Scognet    return z;
4378129203Scognet
4379129203Scognet}
4380129203Scognet
4381129203Scognet/*
4382129203Scognet-------------------------------------------------------------------------------
4383129203ScognetReturns the result of converting the quadruple-precision floating-point
4384129203Scognetvalue `a' to the 64-bit two's complement integer format.  The conversion
4385129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4386129203ScognetArithmetic---which means in particular that the conversion is rounded
4387129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
4388129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
4389129203Scognetlargest integer with the same sign as `a' is returned.
4390129203Scognet-------------------------------------------------------------------------------
4391129203Scognet*/
4392129203Scognetint64 float128_to_int64( float128 a )
4393129203Scognet{
4394129203Scognet    flag aSign;
4395129203Scognet    int32 aExp, shiftCount;
4396129203Scognet    bits64 aSig0, aSig1;
4397129203Scognet
4398129203Scognet    aSig1 = extractFloat128Frac1( a );
4399129203Scognet    aSig0 = extractFloat128Frac0( a );
4400129203Scognet    aExp = extractFloat128Exp( a );
4401129203Scognet    aSign = extractFloat128Sign( a );
4402129203Scognet    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4403129203Scognet    shiftCount = 0x402F - aExp;
4404129203Scognet    if ( shiftCount <= 0 ) {
4405129203Scognet        if ( 0x403E < aExp ) {
4406129203Scognet            float_raise( float_flag_invalid );
4407129203Scognet            if (    ! aSign
4408129203Scognet                 || (    ( aExp == 0x7FFF )
4409129203Scognet                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4410129203Scognet                    )
4411129203Scognet               ) {
4412129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
4413129203Scognet            }
4414129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
4415129203Scognet        }
4416129203Scognet        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4417129203Scognet    }
4418129203Scognet    else {
4419129203Scognet        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4420129203Scognet    }
4421129203Scognet    return roundAndPackInt64( aSign, aSig0, aSig1 );
4422129203Scognet
4423129203Scognet}
4424129203Scognet
4425129203Scognet/*
4426129203Scognet-------------------------------------------------------------------------------
4427129203ScognetReturns the result of converting the quadruple-precision floating-point
4428129203Scognetvalue `a' to the 64-bit two's complement integer format.  The conversion
4429129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4430129203ScognetArithmetic, except that the conversion is always rounded toward zero.
4431129203ScognetIf `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4432129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is
4433129203Scognetreturned.
4434129203Scognet-------------------------------------------------------------------------------
4435129203Scognet*/
4436129203Scognetint64 float128_to_int64_round_to_zero( float128 a )
4437129203Scognet{
4438129203Scognet    flag aSign;
4439129203Scognet    int32 aExp, shiftCount;
4440129203Scognet    bits64 aSig0, aSig1;
4441129203Scognet    int64 z;
4442129203Scognet
4443129203Scognet    aSig1 = extractFloat128Frac1( a );
4444129203Scognet    aSig0 = extractFloat128Frac0( a );
4445129203Scognet    aExp = extractFloat128Exp( a );
4446129203Scognet    aSign = extractFloat128Sign( a );
4447129203Scognet    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4448129203Scognet    shiftCount = aExp - 0x402F;
4449129203Scognet    if ( 0 < shiftCount ) {
4450129203Scognet        if ( 0x403E <= aExp ) {
4451129203Scognet            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4452129203Scognet            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4453129203Scognet                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4454129203Scognet                if ( aSig1 ) float_exception_flags |= float_flag_inexact;
4455129203Scognet            }
4456129203Scognet            else {
4457129203Scognet                float_raise( float_flag_invalid );
4458129203Scognet                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4459129203Scognet                    return LIT64( 0x7FFFFFFFFFFFFFFF );
4460129203Scognet                }
4461129203Scognet            }
4462129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
4463129203Scognet        }
4464129203Scognet        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4465129203Scognet        if ( (bits64) ( aSig1<<shiftCount ) ) {
4466129203Scognet            float_exception_flags |= float_flag_inexact;
4467129203Scognet        }
4468129203Scognet    }
4469129203Scognet    else {
4470129203Scognet        if ( aExp < 0x3FFF ) {
4471129203Scognet            if ( aExp | aSig0 | aSig1 ) {
4472129203Scognet                float_exception_flags |= float_flag_inexact;
4473129203Scognet            }
4474129203Scognet            return 0;
4475129203Scognet        }
4476129203Scognet        z = aSig0>>( - shiftCount );
4477129203Scognet        if (    aSig1
4478129203Scognet             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4479129203Scognet            float_exception_flags |= float_flag_inexact;
4480129203Scognet        }
4481129203Scognet    }
4482129203Scognet    if ( aSign ) z = - z;
4483129203Scognet    return z;
4484129203Scognet
4485129203Scognet}
4486129203Scognet
4487230363Sdas#if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \
4488230363Sdas    && defined(SOFTFLOAT_NEED_FIXUNS)
4489129203Scognet/*
4490230363Sdas * just like above - but do not care for overflow of signed results
4491230363Sdas */
4492230363Sdasuint64 float128_to_uint64_round_to_zero( float128 a )
4493230363Sdas{
4494230363Sdas    flag aSign;
4495230363Sdas    int32 aExp, shiftCount;
4496230363Sdas    bits64 aSig0, aSig1;
4497230363Sdas    uint64 z;
4498230363Sdas
4499230363Sdas    aSig1 = extractFloat128Frac1( a );
4500230363Sdas    aSig0 = extractFloat128Frac0( a );
4501230363Sdas    aExp = extractFloat128Exp( a );
4502230363Sdas    aSign = extractFloat128Sign( a );
4503230363Sdas    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4504230363Sdas    shiftCount = aExp - 0x402F;
4505230363Sdas    if ( 0 < shiftCount ) {
4506230363Sdas        if ( 0x403F <= aExp ) {
4507230363Sdas            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4508230363Sdas            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4509230363Sdas                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4510230363Sdas                if ( aSig1 ) float_exception_flags |= float_flag_inexact;
4511230363Sdas            }
4512230363Sdas            else {
4513230363Sdas                float_raise( float_flag_invalid );
4514230363Sdas            }
4515230363Sdas            return LIT64( 0xFFFFFFFFFFFFFFFF );
4516230363Sdas        }
4517230363Sdas        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4518230363Sdas        if ( (bits64) ( aSig1<<shiftCount ) ) {
4519230363Sdas            float_exception_flags |= float_flag_inexact;
4520230363Sdas        }
4521230363Sdas    }
4522230363Sdas    else {
4523230363Sdas        if ( aExp < 0x3FFF ) {
4524230363Sdas            if ( aExp | aSig0 | aSig1 ) {
4525230363Sdas                float_exception_flags |= float_flag_inexact;
4526230363Sdas            }
4527230363Sdas            return 0;
4528230363Sdas        }
4529230363Sdas        z = aSig0>>( - shiftCount );
4530230363Sdas        if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4531230363Sdas            float_exception_flags |= float_flag_inexact;
4532230363Sdas        }
4533230363Sdas    }
4534230363Sdas    if ( aSign ) z = - z;
4535230363Sdas    return z;
4536230363Sdas
4537230363Sdas}
4538230363Sdas#endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */
4539230363Sdas
4540230363Sdas/*
4541129203Scognet-------------------------------------------------------------------------------
4542129203ScognetReturns the result of converting the quadruple-precision floating-point
4543129203Scognetvalue `a' to the single-precision floating-point format.  The conversion
4544129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4545129203ScognetArithmetic.
4546129203Scognet-------------------------------------------------------------------------------
4547129203Scognet*/
4548129203Scognetfloat32 float128_to_float32( float128 a )
4549129203Scognet{
4550129203Scognet    flag aSign;
4551129203Scognet    int32 aExp;
4552129203Scognet    bits64 aSig0, aSig1;
4553129203Scognet    bits32 zSig;
4554129203Scognet
4555129203Scognet    aSig1 = extractFloat128Frac1( a );
4556129203Scognet    aSig0 = extractFloat128Frac0( a );
4557129203Scognet    aExp = extractFloat128Exp( a );
4558129203Scognet    aSign = extractFloat128Sign( a );
4559129203Scognet    if ( aExp == 0x7FFF ) {
4560129203Scognet        if ( aSig0 | aSig1 ) {
4561129203Scognet            return commonNaNToFloat32( float128ToCommonNaN( a ) );
4562129203Scognet        }
4563129203Scognet        return packFloat32( aSign, 0xFF, 0 );
4564129203Scognet    }
4565129203Scognet    aSig0 |= ( aSig1 != 0 );
4566129203Scognet    shift64RightJamming( aSig0, 18, &aSig0 );
4567129203Scognet    zSig = aSig0;
4568129203Scognet    if ( aExp || zSig ) {
4569129203Scognet        zSig |= 0x40000000;
4570129203Scognet        aExp -= 0x3F81;
4571129203Scognet    }
4572129203Scognet    return roundAndPackFloat32( aSign, aExp, zSig );
4573129203Scognet
4574129203Scognet}
4575129203Scognet
4576129203Scognet/*
4577129203Scognet-------------------------------------------------------------------------------
4578129203ScognetReturns the result of converting the quadruple-precision floating-point
4579129203Scognetvalue `a' to the double-precision floating-point format.  The conversion
4580129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4581129203ScognetArithmetic.
4582129203Scognet-------------------------------------------------------------------------------
4583129203Scognet*/
4584129203Scognetfloat64 float128_to_float64( float128 a )
4585129203Scognet{
4586129203Scognet    flag aSign;
4587129203Scognet    int32 aExp;
4588129203Scognet    bits64 aSig0, aSig1;
4589129203Scognet
4590129203Scognet    aSig1 = extractFloat128Frac1( a );
4591129203Scognet    aSig0 = extractFloat128Frac0( a );
4592129203Scognet    aExp = extractFloat128Exp( a );
4593129203Scognet    aSign = extractFloat128Sign( a );
4594129203Scognet    if ( aExp == 0x7FFF ) {
4595129203Scognet        if ( aSig0 | aSig1 ) {
4596129203Scognet            return commonNaNToFloat64( float128ToCommonNaN( a ) );
4597129203Scognet        }
4598129203Scognet        return packFloat64( aSign, 0x7FF, 0 );
4599129203Scognet    }
4600129203Scognet    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4601129203Scognet    aSig0 |= ( aSig1 != 0 );
4602129203Scognet    if ( aExp || aSig0 ) {
4603129203Scognet        aSig0 |= LIT64( 0x4000000000000000 );
4604129203Scognet        aExp -= 0x3C01;
4605129203Scognet    }
4606129203Scognet    return roundAndPackFloat64( aSign, aExp, aSig0 );
4607129203Scognet
4608129203Scognet}
4609129203Scognet
4610129203Scognet#ifdef FLOATX80
4611129203Scognet
4612129203Scognet/*
4613129203Scognet-------------------------------------------------------------------------------
4614129203ScognetReturns the result of converting the quadruple-precision floating-point
4615129203Scognetvalue `a' to the extended double-precision floating-point format.  The
4616129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
4617129203ScognetFloating-Point Arithmetic.
4618129203Scognet-------------------------------------------------------------------------------
4619129203Scognet*/
4620129203Scognetfloatx80 float128_to_floatx80( float128 a )
4621129203Scognet{
4622129203Scognet    flag aSign;
4623129203Scognet    int32 aExp;
4624129203Scognet    bits64 aSig0, aSig1;
4625129203Scognet
4626129203Scognet    aSig1 = extractFloat128Frac1( a );
4627129203Scognet    aSig0 = extractFloat128Frac0( a );
4628129203Scognet    aExp = extractFloat128Exp( a );
4629129203Scognet    aSign = extractFloat128Sign( a );
4630129203Scognet    if ( aExp == 0x7FFF ) {
4631129203Scognet        if ( aSig0 | aSig1 ) {
4632129203Scognet            return commonNaNToFloatx80( float128ToCommonNaN( a ) );
4633129203Scognet        }
4634129203Scognet        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4635129203Scognet    }
4636129203Scognet    if ( aExp == 0 ) {
4637129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4638129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4639129203Scognet    }
4640129203Scognet    else {
4641129203Scognet        aSig0 |= LIT64( 0x0001000000000000 );
4642129203Scognet    }
4643129203Scognet    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4644129203Scognet    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
4645129203Scognet
4646129203Scognet}
4647129203Scognet
4648129203Scognet#endif
4649129203Scognet
4650129203Scognet/*
4651129203Scognet-------------------------------------------------------------------------------
4652129203ScognetRounds the quadruple-precision floating-point value `a' to an integer, and
4653129203Scognetreturns the result as a quadruple-precision floating-point value.  The
4654129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary
4655129203ScognetFloating-Point Arithmetic.
4656129203Scognet-------------------------------------------------------------------------------
4657129203Scognet*/
4658129203Scognetfloat128 float128_round_to_int( float128 a )
4659129203Scognet{
4660129203Scognet    flag aSign;
4661129203Scognet    int32 aExp;
4662129203Scognet    bits64 lastBitMask, roundBitsMask;
4663129203Scognet    int8 roundingMode;
4664129203Scognet    float128 z;
4665129203Scognet
4666129203Scognet    aExp = extractFloat128Exp( a );
4667129203Scognet    if ( 0x402F <= aExp ) {
4668129203Scognet        if ( 0x406F <= aExp ) {
4669129203Scognet            if (    ( aExp == 0x7FFF )
4670129203Scognet                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4671129203Scognet               ) {
4672129203Scognet                return propagateFloat128NaN( a, a );
4673129203Scognet            }
4674129203Scognet            return a;
4675129203Scognet        }
4676129203Scognet        lastBitMask = 1;
4677129203Scognet        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4678129203Scognet        roundBitsMask = lastBitMask - 1;
4679129203Scognet        z = a;
4680129203Scognet        roundingMode = float_rounding_mode;
4681129203Scognet        if ( roundingMode == float_round_nearest_even ) {
4682129203Scognet            if ( lastBitMask ) {
4683129203Scognet                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4684129203Scognet                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4685129203Scognet            }
4686129203Scognet            else {
4687129203Scognet                if ( (sbits64) z.low < 0 ) {
4688129203Scognet                    ++z.high;
4689129203Scognet                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4690129203Scognet                }
4691129203Scognet            }
4692129203Scognet        }
4693129203Scognet        else if ( roundingMode != float_round_to_zero ) {
4694129203Scognet            if (   extractFloat128Sign( z )
4695129203Scognet                 ^ ( roundingMode == float_round_up ) ) {
4696129203Scognet                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4697129203Scognet            }
4698129203Scognet        }
4699129203Scognet        z.low &= ~ roundBitsMask;
4700129203Scognet    }
4701129203Scognet    else {
4702129203Scognet        if ( aExp < 0x3FFF ) {
4703129203Scognet            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4704129203Scognet            float_exception_flags |= float_flag_inexact;
4705129203Scognet            aSign = extractFloat128Sign( a );
4706129203Scognet            switch ( float_rounding_mode ) {
4707129203Scognet             case float_round_nearest_even:
4708129203Scognet                if (    ( aExp == 0x3FFE )
4709129203Scognet                     && (   extractFloat128Frac0( a )
4710129203Scognet                          | extractFloat128Frac1( a ) )
4711129203Scognet                   ) {
4712129203Scognet                    return packFloat128( aSign, 0x3FFF, 0, 0 );
4713129203Scognet                }
4714129203Scognet                break;
4715129203Scognet	     case float_round_to_zero:
4716129203Scognet		break;
4717129203Scognet             case float_round_down:
4718129203Scognet                return
4719129203Scognet                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4720129203Scognet                    : packFloat128( 0, 0, 0, 0 );
4721129203Scognet             case float_round_up:
4722129203Scognet                return
4723129203Scognet                      aSign ? packFloat128( 1, 0, 0, 0 )
4724129203Scognet                    : packFloat128( 0, 0x3FFF, 0, 0 );
4725129203Scognet            }
4726129203Scognet            return packFloat128( aSign, 0, 0, 0 );
4727129203Scognet        }
4728129203Scognet        lastBitMask = 1;
4729129203Scognet        lastBitMask <<= 0x402F - aExp;
4730129203Scognet        roundBitsMask = lastBitMask - 1;
4731129203Scognet        z.low = 0;
4732129203Scognet        z.high = a.high;
4733129203Scognet        roundingMode = float_rounding_mode;
4734129203Scognet        if ( roundingMode == float_round_nearest_even ) {
4735129203Scognet            z.high += lastBitMask>>1;
4736129203Scognet            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4737129203Scognet                z.high &= ~ lastBitMask;
4738129203Scognet            }
4739129203Scognet        }
4740129203Scognet        else if ( roundingMode != float_round_to_zero ) {
4741129203Scognet            if (   extractFloat128Sign( z )
4742129203Scognet                 ^ ( roundingMode == float_round_up ) ) {
4743129203Scognet                z.high |= ( a.low != 0 );
4744129203Scognet                z.high += roundBitsMask;
4745129203Scognet            }
4746129203Scognet        }
4747129203Scognet        z.high &= ~ roundBitsMask;
4748129203Scognet    }
4749129203Scognet    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4750129203Scognet        float_exception_flags |= float_flag_inexact;
4751129203Scognet    }
4752129203Scognet    return z;
4753129203Scognet
4754129203Scognet}
4755129203Scognet
4756129203Scognet/*
4757129203Scognet-------------------------------------------------------------------------------
4758129203ScognetReturns the result of adding the absolute values of the quadruple-precision
4759129203Scognetfloating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
4760129203Scognetbefore being returned.  `zSign' is ignored if the result is a NaN.
4761129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary
4762129203ScognetFloating-Point Arithmetic.
4763129203Scognet-------------------------------------------------------------------------------
4764129203Scognet*/
4765129203Scognetstatic float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
4766129203Scognet{
4767129203Scognet    int32 aExp, bExp, zExp;
4768129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4769129203Scognet    int32 expDiff;
4770129203Scognet
4771129203Scognet    aSig1 = extractFloat128Frac1( a );
4772129203Scognet    aSig0 = extractFloat128Frac0( a );
4773129203Scognet    aExp = extractFloat128Exp( a );
4774129203Scognet    bSig1 = extractFloat128Frac1( b );
4775129203Scognet    bSig0 = extractFloat128Frac0( b );
4776129203Scognet    bExp = extractFloat128Exp( b );
4777129203Scognet    expDiff = aExp - bExp;
4778129203Scognet    if ( 0 < expDiff ) {
4779129203Scognet        if ( aExp == 0x7FFF ) {
4780129203Scognet            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4781129203Scognet            return a;
4782129203Scognet        }
4783129203Scognet        if ( bExp == 0 ) {
4784129203Scognet            --expDiff;
4785129203Scognet        }
4786129203Scognet        else {
4787129203Scognet            bSig0 |= LIT64( 0x0001000000000000 );
4788129203Scognet        }
4789129203Scognet        shift128ExtraRightJamming(
4790129203Scognet            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4791129203Scognet        zExp = aExp;
4792129203Scognet    }
4793129203Scognet    else if ( expDiff < 0 ) {
4794129203Scognet        if ( bExp == 0x7FFF ) {
4795129203Scognet            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4796129203Scognet            return packFloat128( zSign, 0x7FFF, 0, 0 );
4797129203Scognet        }
4798129203Scognet        if ( aExp == 0 ) {
4799129203Scognet            ++expDiff;
4800129203Scognet        }
4801129203Scognet        else {
4802129203Scognet            aSig0 |= LIT64( 0x0001000000000000 );
4803129203Scognet        }
4804129203Scognet        shift128ExtraRightJamming(
4805129203Scognet            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4806129203Scognet        zExp = bExp;
4807129203Scognet    }
4808129203Scognet    else {
4809129203Scognet        if ( aExp == 0x7FFF ) {
4810129203Scognet            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4811129203Scognet                return propagateFloat128NaN( a, b );
4812129203Scognet            }
4813129203Scognet            return a;
4814129203Scognet        }
4815129203Scognet        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4816129203Scognet        if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
4817129203Scognet        zSig2 = 0;
4818129203Scognet        zSig0 |= LIT64( 0x0002000000000000 );
4819129203Scognet        zExp = aExp;
4820129203Scognet        goto shiftRight1;
4821129203Scognet    }
4822129203Scognet    aSig0 |= LIT64( 0x0001000000000000 );
4823129203Scognet    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4824129203Scognet    --zExp;
4825129203Scognet    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4826129203Scognet    ++zExp;
4827129203Scognet shiftRight1:
4828129203Scognet    shift128ExtraRightJamming(
4829129203Scognet        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4830129203Scognet roundAndPack:
4831129203Scognet    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
4832129203Scognet
4833129203Scognet}
4834129203Scognet
4835129203Scognet/*
4836129203Scognet-------------------------------------------------------------------------------
4837129203ScognetReturns the result of subtracting the absolute values of the quadruple-
4838129203Scognetprecision floating-point values `a' and `b'.  If `zSign' is 1, the
4839129203Scognetdifference is negated before being returned.  `zSign' is ignored if the
4840129203Scognetresult is a NaN.  The subtraction is performed according to the IEC/IEEE
4841129203ScognetStandard for Binary Floating-Point Arithmetic.
4842129203Scognet-------------------------------------------------------------------------------
4843129203Scognet*/
4844129203Scognetstatic float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
4845129203Scognet{
4846129203Scognet    int32 aExp, bExp, zExp;
4847129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4848129203Scognet    int32 expDiff;
4849129203Scognet    float128 z;
4850129203Scognet
4851129203Scognet    aSig1 = extractFloat128Frac1( a );
4852129203Scognet    aSig0 = extractFloat128Frac0( a );
4853129203Scognet    aExp = extractFloat128Exp( a );
4854129203Scognet    bSig1 = extractFloat128Frac1( b );
4855129203Scognet    bSig0 = extractFloat128Frac0( b );
4856129203Scognet    bExp = extractFloat128Exp( b );
4857129203Scognet    expDiff = aExp - bExp;
4858129203Scognet    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4859129203Scognet    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4860129203Scognet    if ( 0 < expDiff ) goto aExpBigger;
4861129203Scognet    if ( expDiff < 0 ) goto bExpBigger;
4862129203Scognet    if ( aExp == 0x7FFF ) {
4863129203Scognet        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4864129203Scognet            return propagateFloat128NaN( a, b );
4865129203Scognet        }
4866129203Scognet        float_raise( float_flag_invalid );
4867129203Scognet        z.low = float128_default_nan_low;
4868129203Scognet        z.high = float128_default_nan_high;
4869129203Scognet        return z;
4870129203Scognet    }
4871129203Scognet    if ( aExp == 0 ) {
4872129203Scognet        aExp = 1;
4873129203Scognet        bExp = 1;
4874129203Scognet    }
4875129203Scognet    if ( bSig0 < aSig0 ) goto aBigger;
4876129203Scognet    if ( aSig0 < bSig0 ) goto bBigger;
4877129203Scognet    if ( bSig1 < aSig1 ) goto aBigger;
4878129203Scognet    if ( aSig1 < bSig1 ) goto bBigger;
4879129203Scognet    return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
4880129203Scognet bExpBigger:
4881129203Scognet    if ( bExp == 0x7FFF ) {
4882129203Scognet        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4883129203Scognet        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4884129203Scognet    }
4885129203Scognet    if ( aExp == 0 ) {
4886129203Scognet        ++expDiff;
4887129203Scognet    }
4888129203Scognet    else {
4889129203Scognet        aSig0 |= LIT64( 0x4000000000000000 );
4890129203Scognet    }
4891129203Scognet    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4892129203Scognet    bSig0 |= LIT64( 0x4000000000000000 );
4893129203Scognet bBigger:
4894129203Scognet    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4895129203Scognet    zExp = bExp;
4896129203Scognet    zSign ^= 1;
4897129203Scognet    goto normalizeRoundAndPack;
4898129203Scognet aExpBigger:
4899129203Scognet    if ( aExp == 0x7FFF ) {
4900129203Scognet        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4901129203Scognet        return a;
4902129203Scognet    }
4903129203Scognet    if ( bExp == 0 ) {
4904129203Scognet        --expDiff;
4905129203Scognet    }
4906129203Scognet    else {
4907129203Scognet        bSig0 |= LIT64( 0x4000000000000000 );
4908129203Scognet    }
4909129203Scognet    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4910129203Scognet    aSig0 |= LIT64( 0x4000000000000000 );
4911129203Scognet aBigger:
4912129203Scognet    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4913129203Scognet    zExp = aExp;
4914129203Scognet normalizeRoundAndPack:
4915129203Scognet    --zExp;
4916129203Scognet    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
4917129203Scognet
4918129203Scognet}
4919129203Scognet
4920129203Scognet/*
4921129203Scognet-------------------------------------------------------------------------------
4922129203ScognetReturns the result of adding the quadruple-precision floating-point values
4923129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
4924129203Scognetfor Binary Floating-Point Arithmetic.
4925129203Scognet-------------------------------------------------------------------------------
4926129203Scognet*/
4927129203Scognetfloat128 float128_add( float128 a, float128 b )
4928129203Scognet{
4929129203Scognet    flag aSign, bSign;
4930129203Scognet
4931129203Scognet    aSign = extractFloat128Sign( a );
4932129203Scognet    bSign = extractFloat128Sign( b );
4933129203Scognet    if ( aSign == bSign ) {
4934129203Scognet        return addFloat128Sigs( a, b, aSign );
4935129203Scognet    }
4936129203Scognet    else {
4937129203Scognet        return subFloat128Sigs( a, b, aSign );
4938129203Scognet    }
4939129203Scognet
4940129203Scognet}
4941129203Scognet
4942129203Scognet/*
4943129203Scognet-------------------------------------------------------------------------------
4944129203ScognetReturns the result of subtracting the quadruple-precision floating-point
4945129203Scognetvalues `a' and `b'.  The operation is performed according to the IEC/IEEE
4946129203ScognetStandard for Binary Floating-Point Arithmetic.
4947129203Scognet-------------------------------------------------------------------------------
4948129203Scognet*/
4949129203Scognetfloat128 float128_sub( float128 a, float128 b )
4950129203Scognet{
4951129203Scognet    flag aSign, bSign;
4952129203Scognet
4953129203Scognet    aSign = extractFloat128Sign( a );
4954129203Scognet    bSign = extractFloat128Sign( b );
4955129203Scognet    if ( aSign == bSign ) {
4956129203Scognet        return subFloat128Sigs( a, b, aSign );
4957129203Scognet    }
4958129203Scognet    else {
4959129203Scognet        return addFloat128Sigs( a, b, aSign );
4960129203Scognet    }
4961129203Scognet
4962129203Scognet}
4963129203Scognet
4964129203Scognet/*
4965129203Scognet-------------------------------------------------------------------------------
4966129203ScognetReturns the result of multiplying the quadruple-precision floating-point
4967129203Scognetvalues `a' and `b'.  The operation is performed according to the IEC/IEEE
4968129203ScognetStandard for Binary Floating-Point Arithmetic.
4969129203Scognet-------------------------------------------------------------------------------
4970129203Scognet*/
4971129203Scognetfloat128 float128_mul( float128 a, float128 b )
4972129203Scognet{
4973129203Scognet    flag aSign, bSign, zSign;
4974129203Scognet    int32 aExp, bExp, zExp;
4975129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
4976129203Scognet    float128 z;
4977129203Scognet
4978129203Scognet    aSig1 = extractFloat128Frac1( a );
4979129203Scognet    aSig0 = extractFloat128Frac0( a );
4980129203Scognet    aExp = extractFloat128Exp( a );
4981129203Scognet    aSign = extractFloat128Sign( a );
4982129203Scognet    bSig1 = extractFloat128Frac1( b );
4983129203Scognet    bSig0 = extractFloat128Frac0( b );
4984129203Scognet    bExp = extractFloat128Exp( b );
4985129203Scognet    bSign = extractFloat128Sign( b );
4986129203Scognet    zSign = aSign ^ bSign;
4987129203Scognet    if ( aExp == 0x7FFF ) {
4988129203Scognet        if (    ( aSig0 | aSig1 )
4989129203Scognet             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4990129203Scognet            return propagateFloat128NaN( a, b );
4991129203Scognet        }
4992129203Scognet        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
4993129203Scognet        return packFloat128( zSign, 0x7FFF, 0, 0 );
4994129203Scognet    }
4995129203Scognet    if ( bExp == 0x7FFF ) {
4996129203Scognet        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4997129203Scognet        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4998129203Scognet invalid:
4999129203Scognet            float_raise( float_flag_invalid );
5000129203Scognet            z.low = float128_default_nan_low;
5001129203Scognet            z.high = float128_default_nan_high;
5002129203Scognet            return z;
5003129203Scognet        }
5004129203Scognet        return packFloat128( zSign, 0x7FFF, 0, 0 );
5005129203Scognet    }
5006129203Scognet    if ( aExp == 0 ) {
5007129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5008129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5009129203Scognet    }
5010129203Scognet    if ( bExp == 0 ) {
5011129203Scognet        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5012129203Scognet        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5013129203Scognet    }
5014129203Scognet    zExp = aExp + bExp - 0x4000;
5015129203Scognet    aSig0 |= LIT64( 0x0001000000000000 );
5016129203Scognet    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5017129203Scognet    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5018129203Scognet    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5019129203Scognet    zSig2 |= ( zSig3 != 0 );
5020129203Scognet    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5021129203Scognet        shift128ExtraRightJamming(
5022129203Scognet            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5023129203Scognet        ++zExp;
5024129203Scognet    }
5025129203Scognet    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5026129203Scognet
5027129203Scognet}
5028129203Scognet
5029129203Scognet/*
5030129203Scognet-------------------------------------------------------------------------------
5031129203ScognetReturns the result of dividing the quadruple-precision floating-point value
5032129203Scognet`a' by the corresponding value `b'.  The operation is performed according to
5033129203Scognetthe IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5034129203Scognet-------------------------------------------------------------------------------
5035129203Scognet*/
5036129203Scognetfloat128 float128_div( float128 a, float128 b )
5037129203Scognet{
5038129203Scognet    flag aSign, bSign, zSign;
5039129203Scognet    int32 aExp, bExp, zExp;
5040129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5041129203Scognet    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5042129203Scognet    float128 z;
5043129203Scognet
5044129203Scognet    aSig1 = extractFloat128Frac1( a );
5045129203Scognet    aSig0 = extractFloat128Frac0( a );
5046129203Scognet    aExp = extractFloat128Exp( a );
5047129203Scognet    aSign = extractFloat128Sign( a );
5048129203Scognet    bSig1 = extractFloat128Frac1( b );
5049129203Scognet    bSig0 = extractFloat128Frac0( b );
5050129203Scognet    bExp = extractFloat128Exp( b );
5051129203Scognet    bSign = extractFloat128Sign( b );
5052129203Scognet    zSign = aSign ^ bSign;
5053129203Scognet    if ( aExp == 0x7FFF ) {
5054129203Scognet        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
5055129203Scognet        if ( bExp == 0x7FFF ) {
5056129203Scognet            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5057129203Scognet            goto invalid;
5058129203Scognet        }
5059129203Scognet        return packFloat128( zSign, 0x7FFF, 0, 0 );
5060129203Scognet    }
5061129203Scognet    if ( bExp == 0x7FFF ) {
5062129203Scognet        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5063129203Scognet        return packFloat128( zSign, 0, 0, 0 );
5064129203Scognet    }
5065129203Scognet    if ( bExp == 0 ) {
5066129203Scognet        if ( ( bSig0 | bSig1 ) == 0 ) {
5067129203Scognet            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5068129203Scognet invalid:
5069129203Scognet                float_raise( float_flag_invalid );
5070129203Scognet                z.low = float128_default_nan_low;
5071129203Scognet                z.high = float128_default_nan_high;
5072129203Scognet                return z;
5073129203Scognet            }
5074129203Scognet            float_raise( float_flag_divbyzero );
5075129203Scognet            return packFloat128( zSign, 0x7FFF, 0, 0 );
5076129203Scognet        }
5077129203Scognet        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5078129203Scognet    }
5079129203Scognet    if ( aExp == 0 ) {
5080129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5081129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5082129203Scognet    }
5083129203Scognet    zExp = aExp - bExp + 0x3FFD;
5084129203Scognet    shortShift128Left(
5085129203Scognet        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5086129203Scognet    shortShift128Left(
5087129203Scognet        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5088129203Scognet    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5089129203Scognet        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5090129203Scognet        ++zExp;
5091129203Scognet    }
5092129203Scognet    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5093129203Scognet    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5094129203Scognet    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5095129203Scognet    while ( (sbits64) rem0 < 0 ) {
5096129203Scognet        --zSig0;
5097129203Scognet        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5098129203Scognet    }
5099129203Scognet    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5100129203Scognet    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5101129203Scognet        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5102129203Scognet        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5103129203Scognet        while ( (sbits64) rem1 < 0 ) {
5104129203Scognet            --zSig1;
5105129203Scognet            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5106129203Scognet        }
5107129203Scognet        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5108129203Scognet    }
5109129203Scognet    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5110129203Scognet    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5111129203Scognet
5112129203Scognet}
5113129203Scognet
5114129203Scognet/*
5115129203Scognet-------------------------------------------------------------------------------
5116129203ScognetReturns the remainder of the quadruple-precision floating-point value `a'
5117129203Scognetwith respect to the corresponding value `b'.  The operation is performed
5118129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5119129203Scognet-------------------------------------------------------------------------------
5120129203Scognet*/
5121129203Scognetfloat128 float128_rem( float128 a, float128 b )
5122129203Scognet{
5123129203Scognet    flag aSign, bSign, zSign;
5124129203Scognet    int32 aExp, bExp, expDiff;
5125129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5126129203Scognet    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
5127129203Scognet    sbits64 sigMean0;
5128129203Scognet    float128 z;
5129129203Scognet
5130129203Scognet    aSig1 = extractFloat128Frac1( a );
5131129203Scognet    aSig0 = extractFloat128Frac0( a );
5132129203Scognet    aExp = extractFloat128Exp( a );
5133129203Scognet    aSign = extractFloat128Sign( a );
5134129203Scognet    bSig1 = extractFloat128Frac1( b );
5135129203Scognet    bSig0 = extractFloat128Frac0( b );
5136129203Scognet    bExp = extractFloat128Exp( b );
5137129203Scognet    bSign = extractFloat128Sign( b );
5138129203Scognet    if ( aExp == 0x7FFF ) {
5139129203Scognet        if (    ( aSig0 | aSig1 )
5140129203Scognet             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5141129203Scognet            return propagateFloat128NaN( a, b );
5142129203Scognet        }
5143129203Scognet        goto invalid;
5144129203Scognet    }
5145129203Scognet    if ( bExp == 0x7FFF ) {
5146129203Scognet        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5147129203Scognet        return a;
5148129203Scognet    }
5149129203Scognet    if ( bExp == 0 ) {
5150129203Scognet        if ( ( bSig0 | bSig1 ) == 0 ) {
5151129203Scognet invalid:
5152129203Scognet            float_raise( float_flag_invalid );
5153129203Scognet            z.low = float128_default_nan_low;
5154129203Scognet            z.high = float128_default_nan_high;
5155129203Scognet            return z;
5156129203Scognet        }
5157129203Scognet        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5158129203Scognet    }
5159129203Scognet    if ( aExp == 0 ) {
5160129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return a;
5161129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5162129203Scognet    }
5163129203Scognet    expDiff = aExp - bExp;
5164129203Scognet    if ( expDiff < -1 ) return a;
5165129203Scognet    shortShift128Left(
5166129203Scognet        aSig0 | LIT64( 0x0001000000000000 ),
5167129203Scognet        aSig1,
5168129203Scognet        15 - ( expDiff < 0 ),
5169129203Scognet        &aSig0,
5170129203Scognet        &aSig1
5171129203Scognet    );
5172129203Scognet    shortShift128Left(
5173129203Scognet        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5174129203Scognet    q = le128( bSig0, bSig1, aSig0, aSig1 );
5175129203Scognet    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5176129203Scognet    expDiff -= 64;
5177129203Scognet    while ( 0 < expDiff ) {
5178129203Scognet        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5179129203Scognet        q = ( 4 < q ) ? q - 4 : 0;
5180129203Scognet        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5181129203Scognet        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5182129203Scognet        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5183129203Scognet        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5184129203Scognet        expDiff -= 61;
5185129203Scognet    }
5186129203Scognet    if ( -64 < expDiff ) {
5187129203Scognet        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5188129203Scognet        q = ( 4 < q ) ? q - 4 : 0;
5189129203Scognet        q >>= - expDiff;
5190129203Scognet        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5191129203Scognet        expDiff += 52;
5192129203Scognet        if ( expDiff < 0 ) {
5193129203Scognet            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5194129203Scognet        }
5195129203Scognet        else {
5196129203Scognet            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5197129203Scognet        }
5198129203Scognet        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5199129203Scognet        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5200129203Scognet    }
5201129203Scognet    else {
5202129203Scognet        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5203129203Scognet        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5204129203Scognet    }
5205129203Scognet    do {
5206129203Scognet        alternateASig0 = aSig0;
5207129203Scognet        alternateASig1 = aSig1;
5208129203Scognet        ++q;
5209129203Scognet        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5210129203Scognet    } while ( 0 <= (sbits64) aSig0 );
5211129203Scognet    add128(
5212230363Sdas        aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
5213129203Scognet    if (    ( sigMean0 < 0 )
5214129203Scognet         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5215129203Scognet        aSig0 = alternateASig0;
5216129203Scognet        aSig1 = alternateASig1;
5217129203Scognet    }
5218129203Scognet    zSign = ( (sbits64) aSig0 < 0 );
5219129203Scognet    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5220129203Scognet    return
5221129203Scognet        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
5222129203Scognet
5223129203Scognet}
5224129203Scognet
5225129203Scognet/*
5226129203Scognet-------------------------------------------------------------------------------
5227129203ScognetReturns the square root of the quadruple-precision floating-point value `a'.
5228129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary
5229129203ScognetFloating-Point Arithmetic.
5230129203Scognet-------------------------------------------------------------------------------
5231129203Scognet*/
5232129203Scognetfloat128 float128_sqrt( float128 a )
5233129203Scognet{
5234129203Scognet    flag aSign;
5235129203Scognet    int32 aExp, zExp;
5236129203Scognet    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5237129203Scognet    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5238129203Scognet    float128 z;
5239129203Scognet
5240129203Scognet    aSig1 = extractFloat128Frac1( a );
5241129203Scognet    aSig0 = extractFloat128Frac0( a );
5242129203Scognet    aExp = extractFloat128Exp( a );
5243129203Scognet    aSign = extractFloat128Sign( a );
5244129203Scognet    if ( aExp == 0x7FFF ) {
5245129203Scognet        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
5246129203Scognet        if ( ! aSign ) return a;
5247129203Scognet        goto invalid;
5248129203Scognet    }
5249129203Scognet    if ( aSign ) {
5250129203Scognet        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5251129203Scognet invalid:
5252129203Scognet        float_raise( float_flag_invalid );
5253129203Scognet        z.low = float128_default_nan_low;
5254129203Scognet        z.high = float128_default_nan_high;
5255129203Scognet        return z;
5256129203Scognet    }
5257129203Scognet    if ( aExp == 0 ) {
5258129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5259129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5260129203Scognet    }
5261129203Scognet    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5262129203Scognet    aSig0 |= LIT64( 0x0001000000000000 );
5263129203Scognet    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5264129203Scognet    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5265129203Scognet    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5266129203Scognet    doubleZSig0 = zSig0<<1;
5267129203Scognet    mul64To128( zSig0, zSig0, &term0, &term1 );
5268129203Scognet    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5269129203Scognet    while ( (sbits64) rem0 < 0 ) {
5270129203Scognet        --zSig0;
5271129203Scognet        doubleZSig0 -= 2;
5272129203Scognet        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5273129203Scognet    }
5274129203Scognet    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5275129203Scognet    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5276129203Scognet        if ( zSig1 == 0 ) zSig1 = 1;
5277129203Scognet        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5278129203Scognet        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5279129203Scognet        mul64To128( zSig1, zSig1, &term2, &term3 );
5280129203Scognet        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5281129203Scognet        while ( (sbits64) rem1 < 0 ) {
5282129203Scognet            --zSig1;
5283129203Scognet            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5284129203Scognet            term3 |= 1;
5285129203Scognet            term2 |= doubleZSig0;
5286129203Scognet            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5287129203Scognet        }
5288129203Scognet        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5289129203Scognet    }
5290129203Scognet    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5291129203Scognet    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
5292129203Scognet
5293129203Scognet}
5294129203Scognet
5295129203Scognet/*
5296129203Scognet-------------------------------------------------------------------------------
5297129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is equal to
5298129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
5299129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5300129203Scognet-------------------------------------------------------------------------------
5301129203Scognet*/
5302129203Scognetflag float128_eq( float128 a, float128 b )
5303129203Scognet{
5304129203Scognet
5305129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5306129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5307129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5308129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5309129203Scognet       ) {
5310129203Scognet        if (    float128_is_signaling_nan( a )
5311129203Scognet             || float128_is_signaling_nan( b ) ) {
5312129203Scognet            float_raise( float_flag_invalid );
5313129203Scognet        }
5314129203Scognet        return 0;
5315129203Scognet    }
5316129203Scognet    return
5317129203Scognet           ( a.low == b.low )
5318129203Scognet        && (    ( a.high == b.high )
5319129203Scognet             || (    ( a.low == 0 )
5320129203Scognet                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5321129203Scognet           );
5322129203Scognet
5323129203Scognet}
5324129203Scognet
5325129203Scognet/*
5326129203Scognet-------------------------------------------------------------------------------
5327129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than
5328129203Scognetor equal to the corresponding value `b', and 0 otherwise.  The comparison
5329129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
5330129203ScognetArithmetic.
5331129203Scognet-------------------------------------------------------------------------------
5332129203Scognet*/
5333129203Scognetflag float128_le( float128 a, float128 b )
5334129203Scognet{
5335129203Scognet    flag aSign, bSign;
5336129203Scognet
5337129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5338129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5339129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5340129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5341129203Scognet       ) {
5342129203Scognet        float_raise( float_flag_invalid );
5343129203Scognet        return 0;
5344129203Scognet    }
5345129203Scognet    aSign = extractFloat128Sign( a );
5346129203Scognet    bSign = extractFloat128Sign( b );
5347129203Scognet    if ( aSign != bSign ) {
5348129203Scognet        return
5349129203Scognet               aSign
5350129203Scognet            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5351129203Scognet                 == 0 );
5352129203Scognet    }
5353129203Scognet    return
5354129203Scognet          aSign ? le128( b.high, b.low, a.high, a.low )
5355129203Scognet        : le128( a.high, a.low, b.high, b.low );
5356129203Scognet
5357129203Scognet}
5358129203Scognet
5359129203Scognet/*
5360129203Scognet-------------------------------------------------------------------------------
5361129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than
5362129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
5363129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5364129203Scognet-------------------------------------------------------------------------------
5365129203Scognet*/
5366129203Scognetflag float128_lt( float128 a, float128 b )
5367129203Scognet{
5368129203Scognet    flag aSign, bSign;
5369129203Scognet
5370129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5371129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5372129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5373129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5374129203Scognet       ) {
5375129203Scognet        float_raise( float_flag_invalid );
5376129203Scognet        return 0;
5377129203Scognet    }
5378129203Scognet    aSign = extractFloat128Sign( a );
5379129203Scognet    bSign = extractFloat128Sign( b );
5380129203Scognet    if ( aSign != bSign ) {
5381129203Scognet        return
5382129203Scognet               aSign
5383129203Scognet            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5384129203Scognet                 != 0 );
5385129203Scognet    }
5386129203Scognet    return
5387129203Scognet          aSign ? lt128( b.high, b.low, a.high, a.low )
5388129203Scognet        : lt128( a.high, a.low, b.high, b.low );
5389129203Scognet
5390129203Scognet}
5391129203Scognet
5392129203Scognet/*
5393129203Scognet-------------------------------------------------------------------------------
5394129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is equal to
5395129203Scognetthe corresponding value `b', and 0 otherwise.  The invalid exception is
5396129203Scognetraised if either operand is a NaN.  Otherwise, the comparison is performed
5397129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5398129203Scognet-------------------------------------------------------------------------------
5399129203Scognet*/
5400129203Scognetflag float128_eq_signaling( float128 a, float128 b )
5401129203Scognet{
5402129203Scognet
5403129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5404129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5405129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5406129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5407129203Scognet       ) {
5408129203Scognet        float_raise( float_flag_invalid );
5409129203Scognet        return 0;
5410129203Scognet    }
5411129203Scognet    return
5412129203Scognet           ( a.low == b.low )
5413129203Scognet        && (    ( a.high == b.high )
5414129203Scognet             || (    ( a.low == 0 )
5415129203Scognet                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5416129203Scognet           );
5417129203Scognet
5418129203Scognet}
5419129203Scognet
5420129203Scognet/*
5421129203Scognet-------------------------------------------------------------------------------
5422129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than
5423129203Scognetor equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5424129203Scognetcause an exception.  Otherwise, the comparison is performed according to the
5425129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
5426129203Scognet-------------------------------------------------------------------------------
5427129203Scognet*/
5428129203Scognetflag float128_le_quiet( float128 a, float128 b )
5429129203Scognet{
5430129203Scognet    flag aSign, bSign;
5431129203Scognet
5432129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5433129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5434129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5435129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5436129203Scognet       ) {
5437129203Scognet        if (    float128_is_signaling_nan( a )
5438129203Scognet             || float128_is_signaling_nan( b ) ) {
5439129203Scognet            float_raise( float_flag_invalid );
5440129203Scognet        }
5441129203Scognet        return 0;
5442129203Scognet    }
5443129203Scognet    aSign = extractFloat128Sign( a );
5444129203Scognet    bSign = extractFloat128Sign( b );
5445129203Scognet    if ( aSign != bSign ) {
5446129203Scognet        return
5447129203Scognet               aSign
5448129203Scognet            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5449129203Scognet                 == 0 );
5450129203Scognet    }
5451129203Scognet    return
5452129203Scognet          aSign ? le128( b.high, b.low, a.high, a.low )
5453129203Scognet        : le128( a.high, a.low, b.high, b.low );
5454129203Scognet
5455129203Scognet}
5456129203Scognet
5457129203Scognet/*
5458129203Scognet-------------------------------------------------------------------------------
5459129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than
5460129203Scognetthe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5461129203Scognetexception.  Otherwise, the comparison is performed according to the IEC/IEEE
5462129203ScognetStandard for Binary Floating-Point Arithmetic.
5463129203Scognet-------------------------------------------------------------------------------
5464129203Scognet*/
5465129203Scognetflag float128_lt_quiet( float128 a, float128 b )
5466129203Scognet{
5467129203Scognet    flag aSign, bSign;
5468129203Scognet
5469129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5470129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5471129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5472129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5473129203Scognet       ) {
5474129203Scognet        if (    float128_is_signaling_nan( a )
5475129203Scognet             || float128_is_signaling_nan( b ) ) {
5476129203Scognet            float_raise( float_flag_invalid );
5477129203Scognet        }
5478129203Scognet        return 0;
5479129203Scognet    }
5480129203Scognet    aSign = extractFloat128Sign( a );
5481129203Scognet    bSign = extractFloat128Sign( b );
5482129203Scognet    if ( aSign != bSign ) {
5483129203Scognet        return
5484129203Scognet               aSign
5485129203Scognet            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5486129203Scognet                 != 0 );
5487129203Scognet    }
5488129203Scognet    return
5489129203Scognet          aSign ? lt128( b.high, b.low, a.high, a.low )
5490129203Scognet        : lt128( a.high, a.low, b.high, b.low );
5491129203Scognet
5492129203Scognet}
5493129203Scognet
5494129203Scognet#endif
5495129203Scognet
5496129203Scognet
5497129203Scognet#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
5498129203Scognet
5499129203Scognet/*
5500129203Scognet * These two routines are not part of the original softfloat distribution.
5501129203Scognet *
5502129203Scognet * They are based on the corresponding conversions to integer but return
5503129203Scognet * unsigned numbers instead since these functions are required by GCC.
5504129203Scognet *
5505129203Scognet * Added by Mark Brinicombe <mark@NetBSD.org>	27/09/97
5506129203Scognet *
5507129203Scognet * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
5508129203Scognet */
5509129203Scognet
5510129203Scognet/*
5511129203Scognet-------------------------------------------------------------------------------
5512129203ScognetReturns the result of converting the double-precision floating-point value
5513129203Scognet`a' to the 32-bit unsigned integer format.  The conversion is
5514129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-point
5515129203ScognetArithmetic, except that the conversion is always rounded toward zero.  If
5516129203Scognet`a' is a NaN, the largest positive integer is returned.  If the conversion
5517129203Scognetoverflows, the largest integer positive is returned.
5518129203Scognet-------------------------------------------------------------------------------
5519129203Scognet*/
5520129203Scognetuint32 float64_to_uint32_round_to_zero( float64 a )
5521129203Scognet{
5522129203Scognet    flag aSign;
5523129203Scognet    int16 aExp, shiftCount;
5524129203Scognet    bits64 aSig, savedASig;
5525129203Scognet    uint32 z;
5526129203Scognet
5527129203Scognet    aSig = extractFloat64Frac( a );
5528129203Scognet    aExp = extractFloat64Exp( a );
5529129203Scognet    aSign = extractFloat64Sign( a );
5530129203Scognet
5531129203Scognet    if (aSign) {
5532129203Scognet        float_raise( float_flag_invalid );
5533129203Scognet    	return(0);
5534129203Scognet    }
5535129203Scognet
5536129203Scognet    if ( 0x41E < aExp ) {
5537129203Scognet        float_raise( float_flag_invalid );
5538129203Scognet        return 0xffffffff;
5539129203Scognet    }
5540129203Scognet    else if ( aExp < 0x3FF ) {
5541129203Scognet        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
5542129203Scognet        return 0;
5543129203Scognet    }
5544129203Scognet    aSig |= LIT64( 0x0010000000000000 );
5545129203Scognet    shiftCount = 0x433 - aExp;
5546129203Scognet    savedASig = aSig;
5547129203Scognet    aSig >>= shiftCount;
5548129203Scognet    z = aSig;
5549129203Scognet    if ( ( aSig<<shiftCount ) != savedASig ) {
5550129203Scognet        float_exception_flags |= float_flag_inexact;
5551129203Scognet    }
5552129203Scognet    return z;
5553129203Scognet
5554129203Scognet}
5555129203Scognet
5556129203Scognet/*
5557129203Scognet-------------------------------------------------------------------------------
5558129203ScognetReturns the result of converting the single-precision floating-point value
5559129203Scognet`a' to the 32-bit unsigned integer format.  The conversion is
5560129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-point
5561129203ScognetArithmetic, except that the conversion is always rounded toward zero.  If
5562129203Scognet`a' is a NaN, the largest positive integer is returned.  If the conversion
5563129203Scognetoverflows, the largest positive integer is returned.
5564129203Scognet-------------------------------------------------------------------------------
5565129203Scognet*/
5566129203Scognetuint32 float32_to_uint32_round_to_zero( float32 a )
5567129203Scognet{
5568129203Scognet    flag aSign;
5569129203Scognet    int16 aExp, shiftCount;
5570129203Scognet    bits32 aSig;
5571129203Scognet    uint32 z;
5572129203Scognet
5573129203Scognet    aSig = extractFloat32Frac( a );
5574129203Scognet    aExp = extractFloat32Exp( a );
5575129203Scognet    aSign = extractFloat32Sign( a );
5576129203Scognet    shiftCount = aExp - 0x9E;
5577129203Scognet
5578129203Scognet    if (aSign) {
5579129203Scognet        float_raise( float_flag_invalid );
5580129203Scognet    	return(0);
5581129203Scognet    }
5582129203Scognet    if ( 0 < shiftCount ) {
5583129203Scognet        float_raise( float_flag_invalid );
5584129203Scognet        return 0xFFFFFFFF;
5585129203Scognet    }
5586129203Scognet    else if ( aExp <= 0x7E ) {
5587129203Scognet        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
5588129203Scognet        return 0;
5589129203Scognet    }
5590129203Scognet    aSig = ( aSig | 0x800000 )<<8;
5591129203Scognet    z = aSig>>( - shiftCount );
5592129203Scognet    if ( aSig<<( shiftCount & 31 ) ) {
5593129203Scognet        float_exception_flags |= float_flag_inexact;
5594129203Scognet    }
5595129203Scognet    return z;
5596129203Scognet
5597129203Scognet}
5598129203Scognet
5599129203Scognet#endif
5600