1129203Scognet/* $NetBSD: softfloat.c,v 1.2 2003/07/26 19:24:52 salo Exp $ */
2129203Scognet
3129203Scognet/*
4129203Scognet * This version hacked for use with gcc -msoft-float by bjh21.
5129203Scognet * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6129203Scognet *  itself).
7129203Scognet */
8129203Scognet
9129203Scognet/*
10129203Scognet * Things you may want to define:
11129203Scognet *
12129203Scognet * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13129203Scognet *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
14129203Scognet *   properly renamed.
15129203Scognet */
16129203Scognet
17129203Scognet/*
18129203Scognet===============================================================================
19129203Scognet
20129203ScognetThis C source file is part of the SoftFloat IEC/IEEE Floating-point
21129203ScognetArithmetic Package, Release 2a.
22129203Scognet
23129203ScognetWritten by John R. Hauser.  This work was made possible in part by the
24129203ScognetInternational Computer Science Institute, located at Suite 600, 1947 Center
25129203ScognetStreet, Berkeley, California 94704.  Funding was partially provided by the
26129203ScognetNational Science Foundation under grant MIP-9311980.  The original version
27129203Scognetof this code was written as part of a project to build a fixed-point vector
28129203Scognetprocessor in collaboration with the University of California at Berkeley,
29129203Scognetoverseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30129203Scognetis available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31129203Scognetarithmetic/SoftFloat.html'.
32129203Scognet
33129203ScognetTHIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34129203Scognethas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35129203ScognetTIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36129203ScognetPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37129203ScognetAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38129203Scognet
39129203ScognetDerivative works are acceptable, even for commercial purposes, so long as
40129203Scognet(1) they include prominent notice that the work is derivative, and (2) they
41129203Scognetinclude prominent notice akin to these four paragraphs for those parts of
42129203Scognetthis code that are retained.
43129203Scognet
44129203Scognet===============================================================================
45129203Scognet*/
46129203Scognet
47129203Scognet#include <sys/cdefs.h>
48129203Scognet__FBSDID("$FreeBSD$");
49129203Scognet
50129203Scognet#ifdef SOFTFLOAT_FOR_GCC
51129203Scognet#include "softfloat-for-gcc.h"
52129203Scognet#endif
53129203Scognet
54129203Scognet#include "milieu.h"
55129203Scognet#include "softfloat.h"
56129203Scognet
57129203Scognet/*
58129203Scognet * Conversions between floats as stored in memory and floats as
59129203Scognet * SoftFloat uses them
60129203Scognet */
61129203Scognet#ifndef FLOAT64_DEMANGLE
62129203Scognet#define FLOAT64_DEMANGLE(a)	(a)
63129203Scognet#endif
64129203Scognet#ifndef FLOAT64_MANGLE
65129203Scognet#define FLOAT64_MANGLE(a)	(a)
66129203Scognet#endif
67129203Scognet
68129203Scognet/*
69129203Scognet-------------------------------------------------------------------------------
70129203ScognetFloating-point rounding mode, extended double-precision rounding precision,
71129203Scognetand exception flags.
72129203Scognet-------------------------------------------------------------------------------
73129203Scognet*/
74129203Scognetfp_rnd_t float_rounding_mode = float_round_nearest_even;
75129203Scognetfp_except float_exception_flags = 0;
76129203Scognet#ifdef FLOATX80
77129203Scognetint8 floatx80_rounding_precision = 80;
78129203Scognet#endif
79129203Scognet
80129203Scognet/*
81129203Scognet-------------------------------------------------------------------------------
82129203ScognetPrimitive arithmetic functions, including multi-word arithmetic, and
83129203Scognetdivision and square root approximations.  (Can be specialized to target if
84129203Scognetdesired.)
85129203Scognet-------------------------------------------------------------------------------
86129203Scognet*/
87129203Scognet#include "softfloat-macros"
88129203Scognet
89129203Scognet/*
90129203Scognet-------------------------------------------------------------------------------
91129203ScognetFunctions and definitions to determine:  (1) whether tininess for underflow
92129203Scognetis detected before or after rounding by default, (2) what (if anything)
93129203Scognethappens when exceptions are raised, (3) how signaling NaNs are distinguished
94129203Scognetfrom quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
95129203Scognetare propagated from function inputs to output.  These details are target-
96129203Scognetspecific.
97129203Scognet-------------------------------------------------------------------------------
98129203Scognet*/
99129203Scognet#include "softfloat-specialize"
100129203Scognet
101129203Scognet#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
102129203Scognet/*
103129203Scognet-------------------------------------------------------------------------------
104129203ScognetTakes a 64-bit fixed-point value `absZ' with binary point between bits 6
105129203Scognetand 7, and returns the properly rounded 32-bit integer corresponding to the
106129203Scognetinput.  If `zSign' is 1, the input is negated before being converted to an
107129203Scognetinteger.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
108129203Scognetis simply rounded to an integer, with the inexact exception raised if the
109129203Scognetinput cannot be represented exactly as an integer.  However, if the fixed-
110129203Scognetpoint input is too large, the invalid exception is raised and the largest
111129203Scognetpositive or negative integer is returned.
112129203Scognet-------------------------------------------------------------------------------
113129203Scognet*/
114129203Scognetstatic int32 roundAndPackInt32( flag zSign, bits64 absZ )
115129203Scognet{
116129203Scognet    int8 roundingMode;
117129203Scognet    flag roundNearestEven;
118129203Scognet    int8 roundIncrement, roundBits;
119129203Scognet    int32 z;
120129203Scognet
121129203Scognet    roundingMode = float_rounding_mode;
122129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
123129203Scognet    roundIncrement = 0x40;
124129203Scognet    if ( ! roundNearestEven ) {
125129203Scognet        if ( roundingMode == float_round_to_zero ) {
126129203Scognet            roundIncrement = 0;
127129203Scognet        }
128129203Scognet        else {
129129203Scognet            roundIncrement = 0x7F;
130129203Scognet            if ( zSign ) {
131129203Scognet                if ( roundingMode == float_round_up ) roundIncrement = 0;
132129203Scognet            }
133129203Scognet            else {
134129203Scognet                if ( roundingMode == float_round_down ) roundIncrement = 0;
135129203Scognet            }
136129203Scognet        }
137129203Scognet    }
138129203Scognet    roundBits = absZ & 0x7F;
139129203Scognet    absZ = ( absZ + roundIncrement )>>7;
140129203Scognet    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
141129203Scognet    z = absZ;
142129203Scognet    if ( zSign ) z = - z;
143129203Scognet    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
144129203Scognet        float_raise( float_flag_invalid );
145129203Scognet        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
146129203Scognet    }
147129203Scognet    if ( roundBits ) float_exception_flags |= float_flag_inexact;
148129203Scognet    return z;
149129203Scognet
150129203Scognet}
151129203Scognet
152129203Scognet/*
153129203Scognet-------------------------------------------------------------------------------
154129203ScognetTakes the 128-bit fixed-point value formed by concatenating `absZ0' and
155129203Scognet`absZ1', with binary point between bits 63 and 64 (between the input words),
156129203Scognetand returns the properly rounded 64-bit integer corresponding to the input.
157129203ScognetIf `zSign' is 1, the input is negated before being converted to an integer.
158129203ScognetOrdinarily, the fixed-point input is simply rounded to an integer, with
159129203Scognetthe inexact exception raised if the input cannot be represented exactly as
160129203Scognetan integer.  However, if the fixed-point input is too large, the invalid
161129203Scognetexception is raised and the largest positive or negative integer is
162129203Scognetreturned.
163129203Scognet-------------------------------------------------------------------------------
164129203Scognet*/
165129203Scognetstatic int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
166129203Scognet{
167129203Scognet    int8 roundingMode;
168129203Scognet    flag roundNearestEven, increment;
169129203Scognet    int64 z;
170129203Scognet
171129203Scognet    roundingMode = float_rounding_mode;
172129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
173129203Scognet    increment = ( (sbits64) absZ1 < 0 );
174129203Scognet    if ( ! roundNearestEven ) {
175129203Scognet        if ( roundingMode == float_round_to_zero ) {
176129203Scognet            increment = 0;
177129203Scognet        }
178129203Scognet        else {
179129203Scognet            if ( zSign ) {
180129203Scognet                increment = ( roundingMode == float_round_down ) && absZ1;
181129203Scognet            }
182129203Scognet            else {
183129203Scognet                increment = ( roundingMode == float_round_up ) && absZ1;
184129203Scognet            }
185129203Scognet        }
186129203Scognet    }
187129203Scognet    if ( increment ) {
188129203Scognet        ++absZ0;
189129203Scognet        if ( absZ0 == 0 ) goto overflow;
190129203Scognet        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
191129203Scognet    }
192129203Scognet    z = absZ0;
193129203Scognet    if ( zSign ) z = - z;
194129203Scognet    if ( z && ( ( z < 0 ) ^ zSign ) ) {
195129203Scognet overflow:
196129203Scognet        float_raise( float_flag_invalid );
197129203Scognet        return
198129203Scognet              zSign ? (sbits64) LIT64( 0x8000000000000000 )
199129203Scognet            : LIT64( 0x7FFFFFFFFFFFFFFF );
200129203Scognet    }
201129203Scognet    if ( absZ1 ) float_exception_flags |= float_flag_inexact;
202129203Scognet    return z;
203129203Scognet
204129203Scognet}
205129203Scognet#endif
206129203Scognet
207129203Scognet/*
208129203Scognet-------------------------------------------------------------------------------
209129203ScognetReturns the fraction bits of the single-precision floating-point value `a'.
210129203Scognet-------------------------------------------------------------------------------
211129203Scognet*/
212129203ScognetINLINE bits32 extractFloat32Frac( float32 a )
213129203Scognet{
214129203Scognet
215129203Scognet    return a & 0x007FFFFF;
216129203Scognet
217129203Scognet}
218129203Scognet
219129203Scognet/*
220129203Scognet-------------------------------------------------------------------------------
221129203ScognetReturns the exponent bits of the single-precision floating-point value `a'.
222129203Scognet-------------------------------------------------------------------------------
223129203Scognet*/
224129203ScognetINLINE int16 extractFloat32Exp( float32 a )
225129203Scognet{
226129203Scognet
227129203Scognet    return ( a>>23 ) & 0xFF;
228129203Scognet
229129203Scognet}
230129203Scognet
231129203Scognet/*
232129203Scognet-------------------------------------------------------------------------------
233129203ScognetReturns the sign bit of the single-precision floating-point value `a'.
234129203Scognet-------------------------------------------------------------------------------
235129203Scognet*/
236129203ScognetINLINE flag extractFloat32Sign( float32 a )
237129203Scognet{
238129203Scognet
239129203Scognet    return a>>31;
240129203Scognet
241129203Scognet}
242129203Scognet
243129203Scognet/*
244129203Scognet-------------------------------------------------------------------------------
245129203ScognetNormalizes the subnormal single-precision floating-point value represented
246129203Scognetby the denormalized significand `aSig'.  The normalized exponent and
247129203Scognetsignificand are stored at the locations pointed to by `zExpPtr' and
248129203Scognet`zSigPtr', respectively.
249129203Scognet-------------------------------------------------------------------------------
250129203Scognet*/
251129203Scognetstatic void
252129203Scognet normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
253129203Scognet{
254129203Scognet    int8 shiftCount;
255129203Scognet
256129203Scognet    shiftCount = countLeadingZeros32( aSig ) - 8;
257129203Scognet    *zSigPtr = aSig<<shiftCount;
258129203Scognet    *zExpPtr = 1 - shiftCount;
259129203Scognet
260129203Scognet}
261129203Scognet
262129203Scognet/*
263129203Scognet-------------------------------------------------------------------------------
264129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into a
265129203Scognetsingle-precision floating-point value, returning the result.  After being
266129203Scognetshifted into the proper positions, the three fields are simply added
267129203Scognettogether to form the result.  This means that any integer portion of `zSig'
268129203Scognetwill be added into the exponent.  Since a properly normalized significand
269129203Scognetwill have an integer portion equal to 1, the `zExp' input should be 1 less
270129203Scognetthan the desired result exponent whenever `zSig' is a complete, normalized
271129203Scognetsignificand.
272129203Scognet-------------------------------------------------------------------------------
273129203Scognet*/
274129203ScognetINLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
275129203Scognet{
276129203Scognet
277129203Scognet    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
278129203Scognet
279129203Scognet}
280129203Scognet
281129203Scognet/*
282129203Scognet-------------------------------------------------------------------------------
283129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
284129203Scognetand significand `zSig', and returns the proper single-precision floating-
285129203Scognetpoint value corresponding to the abstract input.  Ordinarily, the abstract
286129203Scognetvalue is simply rounded and packed into the single-precision format, with
287129203Scognetthe inexact exception raised if the abstract input cannot be represented
288129203Scognetexactly.  However, if the abstract value is too large, the overflow and
289129203Scognetinexact exceptions are raised and an infinity or maximal finite value is
290129203Scognetreturned.  If the abstract value is too small, the input value is rounded to
291129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if
292129203Scognetthe abstract input cannot be represented exactly as a subnormal single-
293129203Scognetprecision floating-point number.
294129203Scognet    The input significand `zSig' has its binary point between bits 30
295129203Scognetand 29, which is 7 bits to the left of the usual location.  This shifted
296129203Scognetsignificand must be normalized or smaller.  If `zSig' is not normalized,
297129203Scognet`zExp' must be 0; in that case, the result returned is a subnormal number,
298129203Scognetand it must not require rounding.  In the usual case that `zSig' is
299129203Scognetnormalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
300129203ScognetThe handling of underflow and overflow follows the IEC/IEEE Standard for
301129203ScognetBinary Floating-Point Arithmetic.
302129203Scognet-------------------------------------------------------------------------------
303129203Scognet*/
304129203Scognetstatic float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
305129203Scognet{
306129203Scognet    int8 roundingMode;
307129203Scognet    flag roundNearestEven;
308129203Scognet    int8 roundIncrement, roundBits;
309129203Scognet    flag isTiny;
310129203Scognet
311129203Scognet    roundingMode = float_rounding_mode;
312129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
313129203Scognet    roundIncrement = 0x40;
314129203Scognet    if ( ! roundNearestEven ) {
315129203Scognet        if ( roundingMode == float_round_to_zero ) {
316129203Scognet            roundIncrement = 0;
317129203Scognet        }
318129203Scognet        else {
319129203Scognet            roundIncrement = 0x7F;
320129203Scognet            if ( zSign ) {
321129203Scognet                if ( roundingMode == float_round_up ) roundIncrement = 0;
322129203Scognet            }
323129203Scognet            else {
324129203Scognet                if ( roundingMode == float_round_down ) roundIncrement = 0;
325129203Scognet            }
326129203Scognet        }
327129203Scognet    }
328129203Scognet    roundBits = zSig & 0x7F;
329129203Scognet    if ( 0xFD <= (bits16) zExp ) {
330129203Scognet        if (    ( 0xFD < zExp )
331129203Scognet             || (    ( zExp == 0xFD )
332129203Scognet                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
333129203Scognet           ) {
334129203Scognet            float_raise( float_flag_overflow | float_flag_inexact );
335129203Scognet            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
336129203Scognet        }
337129203Scognet        if ( zExp < 0 ) {
338129203Scognet            isTiny =
339129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
340129203Scognet                || ( zExp < -1 )
341129203Scognet                || ( zSig + roundIncrement < 0x80000000 );
342129203Scognet            shift32RightJamming( zSig, - zExp, &zSig );
343129203Scognet            zExp = 0;
344129203Scognet            roundBits = zSig & 0x7F;
345129203Scognet            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
346129203Scognet        }
347129203Scognet    }
348129203Scognet    if ( roundBits ) float_exception_flags |= float_flag_inexact;
349129203Scognet    zSig = ( zSig + roundIncrement )>>7;
350129203Scognet    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
351129203Scognet    if ( zSig == 0 ) zExp = 0;
352129203Scognet    return packFloat32( zSign, zExp, zSig );
353129203Scognet
354129203Scognet}
355129203Scognet
356129203Scognet/*
357129203Scognet-------------------------------------------------------------------------------
358129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
359129203Scognetand significand `zSig', and returns the proper single-precision floating-
360129203Scognetpoint value corresponding to the abstract input.  This routine is just like
361129203Scognet`roundAndPackFloat32' except that `zSig' does not have to be normalized.
362129203ScognetBit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
363129203Scognetfloating-point exponent.
364129203Scognet-------------------------------------------------------------------------------
365129203Scognet*/
366129203Scognetstatic float32
367129203Scognet normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
368129203Scognet{
369129203Scognet    int8 shiftCount;
370129203Scognet
371129203Scognet    shiftCount = countLeadingZeros32( zSig ) - 1;
372129203Scognet    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
373129203Scognet
374129203Scognet}
375129203Scognet
376129203Scognet/*
377129203Scognet-------------------------------------------------------------------------------
378129203ScognetReturns the fraction bits of the double-precision floating-point value `a'.
379129203Scognet-------------------------------------------------------------------------------
380129203Scognet*/
381129203ScognetINLINE bits64 extractFloat64Frac( float64 a )
382129203Scognet{
383129203Scognet
384129203Scognet    return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
385129203Scognet
386129203Scognet}
387129203Scognet
388129203Scognet/*
389129203Scognet-------------------------------------------------------------------------------
390129203ScognetReturns the exponent bits of the double-precision floating-point value `a'.
391129203Scognet-------------------------------------------------------------------------------
392129203Scognet*/
393129203ScognetINLINE int16 extractFloat64Exp( float64 a )
394129203Scognet{
395129203Scognet
396129203Scognet    return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
397129203Scognet
398129203Scognet}
399129203Scognet
400129203Scognet/*
401129203Scognet-------------------------------------------------------------------------------
402129203ScognetReturns the sign bit of the double-precision floating-point value `a'.
403129203Scognet-------------------------------------------------------------------------------
404129203Scognet*/
405129203ScognetINLINE flag extractFloat64Sign( float64 a )
406129203Scognet{
407129203Scognet
408129203Scognet    return FLOAT64_DEMANGLE(a)>>63;
409129203Scognet
410129203Scognet}
411129203Scognet
412129203Scognet/*
413129203Scognet-------------------------------------------------------------------------------
414129203ScognetNormalizes the subnormal double-precision floating-point value represented
415129203Scognetby the denormalized significand `aSig'.  The normalized exponent and
416129203Scognetsignificand are stored at the locations pointed to by `zExpPtr' and
417129203Scognet`zSigPtr', respectively.
418129203Scognet-------------------------------------------------------------------------------
419129203Scognet*/
420129203Scognetstatic void
421129203Scognet normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
422129203Scognet{
423129203Scognet    int8 shiftCount;
424129203Scognet
425129203Scognet    shiftCount = countLeadingZeros64( aSig ) - 11;
426129203Scognet    *zSigPtr = aSig<<shiftCount;
427129203Scognet    *zExpPtr = 1 - shiftCount;
428129203Scognet
429129203Scognet}
430129203Scognet
431129203Scognet/*
432129203Scognet-------------------------------------------------------------------------------
433129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into a
434129203Scognetdouble-precision floating-point value, returning the result.  After being
435129203Scognetshifted into the proper positions, the three fields are simply added
436129203Scognettogether to form the result.  This means that any integer portion of `zSig'
437129203Scognetwill be added into the exponent.  Since a properly normalized significand
438129203Scognetwill have an integer portion equal to 1, the `zExp' input should be 1 less
439129203Scognetthan the desired result exponent whenever `zSig' is a complete, normalized
440129203Scognetsignificand.
441129203Scognet-------------------------------------------------------------------------------
442129203Scognet*/
443129203ScognetINLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
444129203Scognet{
445129203Scognet
446129203Scognet    return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
447129203Scognet			   ( ( (bits64) zExp )<<52 ) + zSig );
448129203Scognet
449129203Scognet}
450129203Scognet
451129203Scognet/*
452129203Scognet-------------------------------------------------------------------------------
453129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
454129203Scognetand significand `zSig', and returns the proper double-precision floating-
455129203Scognetpoint value corresponding to the abstract input.  Ordinarily, the abstract
456129203Scognetvalue is simply rounded and packed into the double-precision format, with
457129203Scognetthe inexact exception raised if the abstract input cannot be represented
458129203Scognetexactly.  However, if the abstract value is too large, the overflow and
459129203Scognetinexact exceptions are raised and an infinity or maximal finite value is
460129203Scognetreturned.  If the abstract value is too small, the input value is rounded to
461129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if
462129203Scognetthe abstract input cannot be represented exactly as a subnormal double-
463129203Scognetprecision floating-point number.
464129203Scognet    The input significand `zSig' has its binary point between bits 62
465129203Scognetand 61, which is 10 bits to the left of the usual location.  This shifted
466129203Scognetsignificand must be normalized or smaller.  If `zSig' is not normalized,
467129203Scognet`zExp' must be 0; in that case, the result returned is a subnormal number,
468129203Scognetand it must not require rounding.  In the usual case that `zSig' is
469129203Scognetnormalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
470129203ScognetThe handling of underflow and overflow follows the IEC/IEEE Standard for
471129203ScognetBinary Floating-Point Arithmetic.
472129203Scognet-------------------------------------------------------------------------------
473129203Scognet*/
474129203Scognetstatic float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
475129203Scognet{
476129203Scognet    int8 roundingMode;
477129203Scognet    flag roundNearestEven;
478129203Scognet    int16 roundIncrement, roundBits;
479129203Scognet    flag isTiny;
480129203Scognet
481129203Scognet    roundingMode = float_rounding_mode;
482129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
483129203Scognet    roundIncrement = 0x200;
484129203Scognet    if ( ! roundNearestEven ) {
485129203Scognet        if ( roundingMode == float_round_to_zero ) {
486129203Scognet            roundIncrement = 0;
487129203Scognet        }
488129203Scognet        else {
489129203Scognet            roundIncrement = 0x3FF;
490129203Scognet            if ( zSign ) {
491129203Scognet                if ( roundingMode == float_round_up ) roundIncrement = 0;
492129203Scognet            }
493129203Scognet            else {
494129203Scognet                if ( roundingMode == float_round_down ) roundIncrement = 0;
495129203Scognet            }
496129203Scognet        }
497129203Scognet    }
498129203Scognet    roundBits = zSig & 0x3FF;
499129203Scognet    if ( 0x7FD <= (bits16) zExp ) {
500129203Scognet        if (    ( 0x7FD < zExp )
501129203Scognet             || (    ( zExp == 0x7FD )
502129203Scognet                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
503129203Scognet           ) {
504129203Scognet            float_raise( float_flag_overflow | float_flag_inexact );
505129203Scognet            return FLOAT64_MANGLE(
506129203Scognet		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
507129203Scognet		( roundIncrement == 0 ));
508129203Scognet        }
509129203Scognet        if ( zExp < 0 ) {
510129203Scognet            isTiny =
511129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
512129203Scognet                || ( zExp < -1 )
513129203Scognet                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
514129203Scognet            shift64RightJamming( zSig, - zExp, &zSig );
515129203Scognet            zExp = 0;
516129203Scognet            roundBits = zSig & 0x3FF;
517129203Scognet            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
518129203Scognet        }
519129203Scognet    }
520129203Scognet    if ( roundBits ) float_exception_flags |= float_flag_inexact;
521129203Scognet    zSig = ( zSig + roundIncrement )>>10;
522129203Scognet    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
523129203Scognet    if ( zSig == 0 ) zExp = 0;
524129203Scognet    return packFloat64( zSign, zExp, zSig );
525129203Scognet
526129203Scognet}
527129203Scognet
528129203Scognet/*
529129203Scognet-------------------------------------------------------------------------------
530129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
531129203Scognetand significand `zSig', and returns the proper double-precision floating-
532129203Scognetpoint value corresponding to the abstract input.  This routine is just like
533129203Scognet`roundAndPackFloat64' except that `zSig' does not have to be normalized.
534129203ScognetBit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
535129203Scognetfloating-point exponent.
536129203Scognet-------------------------------------------------------------------------------
537129203Scognet*/
538129203Scognetstatic float64
539129203Scognet normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
540129203Scognet{
541129203Scognet    int8 shiftCount;
542129203Scognet
543129203Scognet    shiftCount = countLeadingZeros64( zSig ) - 1;
544129203Scognet    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
545129203Scognet
546129203Scognet}
547129203Scognet
548129203Scognet#ifdef FLOATX80
549129203Scognet
550129203Scognet/*
551129203Scognet-------------------------------------------------------------------------------
552129203ScognetReturns the fraction bits of the extended double-precision floating-point
553129203Scognetvalue `a'.
554129203Scognet-------------------------------------------------------------------------------
555129203Scognet*/
556129203ScognetINLINE bits64 extractFloatx80Frac( floatx80 a )
557129203Scognet{
558129203Scognet
559129203Scognet    return a.low;
560129203Scognet
561129203Scognet}
562129203Scognet
563129203Scognet/*
564129203Scognet-------------------------------------------------------------------------------
565129203ScognetReturns the exponent bits of the extended double-precision floating-point
566129203Scognetvalue `a'.
567129203Scognet-------------------------------------------------------------------------------
568129203Scognet*/
569129203ScognetINLINE int32 extractFloatx80Exp( floatx80 a )
570129203Scognet{
571129203Scognet
572129203Scognet    return a.high & 0x7FFF;
573129203Scognet
574129203Scognet}
575129203Scognet
576129203Scognet/*
577129203Scognet-------------------------------------------------------------------------------
578129203ScognetReturns the sign bit of the extended double-precision floating-point value
579129203Scognet`a'.
580129203Scognet-------------------------------------------------------------------------------
581129203Scognet*/
582129203ScognetINLINE flag extractFloatx80Sign( floatx80 a )
583129203Scognet{
584129203Scognet
585129203Scognet    return a.high>>15;
586129203Scognet
587129203Scognet}
588129203Scognet
589129203Scognet/*
590129203Scognet-------------------------------------------------------------------------------
591129203ScognetNormalizes the subnormal extended double-precision floating-point value
592129203Scognetrepresented by the denormalized significand `aSig'.  The normalized exponent
593129203Scognetand significand are stored at the locations pointed to by `zExpPtr' and
594129203Scognet`zSigPtr', respectively.
595129203Scognet-------------------------------------------------------------------------------
596129203Scognet*/
597129203Scognetstatic void
598129203Scognet normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
599129203Scognet{
600129203Scognet    int8 shiftCount;
601129203Scognet
602129203Scognet    shiftCount = countLeadingZeros64( aSig );
603129203Scognet    *zSigPtr = aSig<<shiftCount;
604129203Scognet    *zExpPtr = 1 - shiftCount;
605129203Scognet
606129203Scognet}
607129203Scognet
608129203Scognet/*
609129203Scognet-------------------------------------------------------------------------------
610129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into an
611129203Scognetextended double-precision floating-point value, returning the result.
612129203Scognet-------------------------------------------------------------------------------
613129203Scognet*/
614129203ScognetINLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
615129203Scognet{
616129203Scognet    floatx80 z;
617129203Scognet
618129203Scognet    z.low = zSig;
619129203Scognet    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
620129203Scognet    return z;
621129203Scognet
622129203Scognet}
623129203Scognet
624129203Scognet/*
625129203Scognet-------------------------------------------------------------------------------
626129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
627129203Scognetand extended significand formed by the concatenation of `zSig0' and `zSig1',
628129203Scognetand returns the proper extended double-precision floating-point value
629129203Scognetcorresponding to the abstract input.  Ordinarily, the abstract value is
630129203Scognetrounded and packed into the extended double-precision format, with the
631129203Scognetinexact exception raised if the abstract input cannot be represented
632129203Scognetexactly.  However, if the abstract value is too large, the overflow and
633129203Scognetinexact exceptions are raised and an infinity or maximal finite value is
634129203Scognetreturned.  If the abstract value is too small, the input value is rounded to
635129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if
636129203Scognetthe abstract input cannot be represented exactly as a subnormal extended
637129203Scognetdouble-precision floating-point number.
638129203Scognet    If `roundingPrecision' is 32 or 64, the result is rounded to the same
639129203Scognetnumber of bits as single or double precision, respectively.  Otherwise, the
640129203Scognetresult is rounded to the full precision of the extended double-precision
641129203Scognetformat.
642129203Scognet    The input significand must be normalized or smaller.  If the input
643129203Scognetsignificand is not normalized, `zExp' must be 0; in that case, the result
644129203Scognetreturned is a subnormal number, and it must not require rounding.  The
645129203Scognethandling of underflow and overflow follows the IEC/IEEE Standard for Binary
646129203ScognetFloating-Point Arithmetic.
647129203Scognet-------------------------------------------------------------------------------
648129203Scognet*/
649129203Scognetstatic floatx80
650129203Scognet roundAndPackFloatx80(
651129203Scognet     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
652129203Scognet )
653129203Scognet{
654129203Scognet    int8 roundingMode;
655129203Scognet    flag roundNearestEven, increment, isTiny;
656129203Scognet    int64 roundIncrement, roundMask, roundBits;
657129203Scognet
658129203Scognet    roundingMode = float_rounding_mode;
659129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
660129203Scognet    if ( roundingPrecision == 80 ) goto precision80;
661129203Scognet    if ( roundingPrecision == 64 ) {
662129203Scognet        roundIncrement = LIT64( 0x0000000000000400 );
663129203Scognet        roundMask = LIT64( 0x00000000000007FF );
664129203Scognet    }
665129203Scognet    else if ( roundingPrecision == 32 ) {
666129203Scognet        roundIncrement = LIT64( 0x0000008000000000 );
667129203Scognet        roundMask = LIT64( 0x000000FFFFFFFFFF );
668129203Scognet    }
669129203Scognet    else {
670129203Scognet        goto precision80;
671129203Scognet    }
672129203Scognet    zSig0 |= ( zSig1 != 0 );
673129203Scognet    if ( ! roundNearestEven ) {
674129203Scognet        if ( roundingMode == float_round_to_zero ) {
675129203Scognet            roundIncrement = 0;
676129203Scognet        }
677129203Scognet        else {
678129203Scognet            roundIncrement = roundMask;
679129203Scognet            if ( zSign ) {
680129203Scognet                if ( roundingMode == float_round_up ) roundIncrement = 0;
681129203Scognet            }
682129203Scognet            else {
683129203Scognet                if ( roundingMode == float_round_down ) roundIncrement = 0;
684129203Scognet            }
685129203Scognet        }
686129203Scognet    }
687129203Scognet    roundBits = zSig0 & roundMask;
688129203Scognet    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
689129203Scognet        if (    ( 0x7FFE < zExp )
690129203Scognet             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
691129203Scognet           ) {
692129203Scognet            goto overflow;
693129203Scognet        }
694129203Scognet        if ( zExp <= 0 ) {
695129203Scognet            isTiny =
696129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
697129203Scognet                || ( zExp < 0 )
698129203Scognet                || ( zSig0 <= zSig0 + roundIncrement );
699129203Scognet            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
700129203Scognet            zExp = 0;
701129203Scognet            roundBits = zSig0 & roundMask;
702129203Scognet            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
703129203Scognet            if ( roundBits ) float_exception_flags |= float_flag_inexact;
704129203Scognet            zSig0 += roundIncrement;
705129203Scognet            if ( (sbits64) zSig0 < 0 ) zExp = 1;
706129203Scognet            roundIncrement = roundMask + 1;
707129203Scognet            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
708129203Scognet                roundMask |= roundIncrement;
709129203Scognet            }
710129203Scognet            zSig0 &= ~ roundMask;
711129203Scognet            return packFloatx80( zSign, zExp, zSig0 );
712129203Scognet        }
713129203Scognet    }
714129203Scognet    if ( roundBits ) float_exception_flags |= float_flag_inexact;
715129203Scognet    zSig0 += roundIncrement;
716129203Scognet    if ( zSig0 < roundIncrement ) {
717129203Scognet        ++zExp;
718129203Scognet        zSig0 = LIT64( 0x8000000000000000 );
719129203Scognet    }
720129203Scognet    roundIncrement = roundMask + 1;
721129203Scognet    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
722129203Scognet        roundMask |= roundIncrement;
723129203Scognet    }
724129203Scognet    zSig0 &= ~ roundMask;
725129203Scognet    if ( zSig0 == 0 ) zExp = 0;
726129203Scognet    return packFloatx80( zSign, zExp, zSig0 );
727129203Scognet precision80:
728129203Scognet    increment = ( (sbits64) zSig1 < 0 );
729129203Scognet    if ( ! roundNearestEven ) {
730129203Scognet        if ( roundingMode == float_round_to_zero ) {
731129203Scognet            increment = 0;
732129203Scognet        }
733129203Scognet        else {
734129203Scognet            if ( zSign ) {
735129203Scognet                increment = ( roundingMode == float_round_down ) && zSig1;
736129203Scognet            }
737129203Scognet            else {
738129203Scognet                increment = ( roundingMode == float_round_up ) && zSig1;
739129203Scognet            }
740129203Scognet        }
741129203Scognet    }
742129203Scognet    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
743129203Scognet        if (    ( 0x7FFE < zExp )
744129203Scognet             || (    ( zExp == 0x7FFE )
745129203Scognet                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
746129203Scognet                  && increment
747129203Scognet                )
748129203Scognet           ) {
749129203Scognet            roundMask = 0;
750129203Scognet overflow:
751129203Scognet            float_raise( float_flag_overflow | float_flag_inexact );
752129203Scognet            if (    ( roundingMode == float_round_to_zero )
753129203Scognet                 || ( zSign && ( roundingMode == float_round_up ) )
754129203Scognet                 || ( ! zSign && ( roundingMode == float_round_down ) )
755129203Scognet               ) {
756129203Scognet                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
757129203Scognet            }
758129203Scognet            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
759129203Scognet        }
760129203Scognet        if ( zExp <= 0 ) {
761129203Scognet            isTiny =
762129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
763129203Scognet                || ( zExp < 0 )
764129203Scognet                || ! increment
765129203Scognet                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
766129203Scognet            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
767129203Scognet            zExp = 0;
768129203Scognet            if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
769129203Scognet            if ( zSig1 ) float_exception_flags |= float_flag_inexact;
770129203Scognet            if ( roundNearestEven ) {
771129203Scognet                increment = ( (sbits64) zSig1 < 0 );
772129203Scognet            }
773129203Scognet            else {
774129203Scognet                if ( zSign ) {
775129203Scognet                    increment = ( roundingMode == float_round_down ) && zSig1;
776129203Scognet                }
777129203Scognet                else {
778129203Scognet                    increment = ( roundingMode == float_round_up ) && zSig1;
779129203Scognet                }
780129203Scognet            }
781129203Scognet            if ( increment ) {
782129203Scognet                ++zSig0;
783129203Scognet                zSig0 &=
784129203Scognet                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
785129203Scognet                if ( (sbits64) zSig0 < 0 ) zExp = 1;
786129203Scognet            }
787129203Scognet            return packFloatx80( zSign, zExp, zSig0 );
788129203Scognet        }
789129203Scognet    }
790129203Scognet    if ( zSig1 ) float_exception_flags |= float_flag_inexact;
791129203Scognet    if ( increment ) {
792129203Scognet        ++zSig0;
793129203Scognet        if ( zSig0 == 0 ) {
794129203Scognet            ++zExp;
795129203Scognet            zSig0 = LIT64( 0x8000000000000000 );
796129203Scognet        }
797129203Scognet        else {
798129203Scognet            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
799129203Scognet        }
800129203Scognet    }
801129203Scognet    else {
802129203Scognet        if ( zSig0 == 0 ) zExp = 0;
803129203Scognet    }
804129203Scognet    return packFloatx80( zSign, zExp, zSig0 );
805129203Scognet
806129203Scognet}
807129203Scognet
808129203Scognet/*
809129203Scognet-------------------------------------------------------------------------------
810129203ScognetTakes an abstract floating-point value having sign `zSign', exponent
811129203Scognet`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
812129203Scognetand returns the proper extended double-precision floating-point value
813129203Scognetcorresponding to the abstract input.  This routine is just like
814129203Scognet`roundAndPackFloatx80' except that the input significand does not have to be
815129203Scognetnormalized.
816129203Scognet-------------------------------------------------------------------------------
817129203Scognet*/
818129203Scognetstatic floatx80
819129203Scognet normalizeRoundAndPackFloatx80(
820129203Scognet     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
821129203Scognet )
822129203Scognet{
823129203Scognet    int8 shiftCount;
824129203Scognet
825129203Scognet    if ( zSig0 == 0 ) {
826129203Scognet        zSig0 = zSig1;
827129203Scognet        zSig1 = 0;
828129203Scognet        zExp -= 64;
829129203Scognet    }
830129203Scognet    shiftCount = countLeadingZeros64( zSig0 );
831129203Scognet    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
832129203Scognet    zExp -= shiftCount;
833129203Scognet    return
834129203Scognet        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
835129203Scognet
836129203Scognet}
837129203Scognet
838129203Scognet#endif
839129203Scognet
840129203Scognet#ifdef FLOAT128
841129203Scognet
842129203Scognet/*
843129203Scognet-------------------------------------------------------------------------------
844129203ScognetReturns the least-significant 64 fraction bits of the quadruple-precision
845129203Scognetfloating-point value `a'.
846129203Scognet-------------------------------------------------------------------------------
847129203Scognet*/
848129203ScognetINLINE bits64 extractFloat128Frac1( float128 a )
849129203Scognet{
850129203Scognet
851129203Scognet    return a.low;
852129203Scognet
853129203Scognet}
854129203Scognet
855129203Scognet/*
856129203Scognet-------------------------------------------------------------------------------
857129203ScognetReturns the most-significant 48 fraction bits of the quadruple-precision
858129203Scognetfloating-point value `a'.
859129203Scognet-------------------------------------------------------------------------------
860129203Scognet*/
861129203ScognetINLINE bits64 extractFloat128Frac0( float128 a )
862129203Scognet{
863129203Scognet
864129203Scognet    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
865129203Scognet
866129203Scognet}
867129203Scognet
868129203Scognet/*
869129203Scognet-------------------------------------------------------------------------------
870129203ScognetReturns the exponent bits of the quadruple-precision floating-point value
871129203Scognet`a'.
872129203Scognet-------------------------------------------------------------------------------
873129203Scognet*/
874129203ScognetINLINE int32 extractFloat128Exp( float128 a )
875129203Scognet{
876129203Scognet
877129203Scognet    return ( a.high>>48 ) & 0x7FFF;
878129203Scognet
879129203Scognet}
880129203Scognet
881129203Scognet/*
882129203Scognet-------------------------------------------------------------------------------
883129203ScognetReturns the sign bit of the quadruple-precision floating-point value `a'.
884129203Scognet-------------------------------------------------------------------------------
885129203Scognet*/
886129203ScognetINLINE flag extractFloat128Sign( float128 a )
887129203Scognet{
888129203Scognet
889129203Scognet    return a.high>>63;
890129203Scognet
891129203Scognet}
892129203Scognet
893129203Scognet/*
894129203Scognet-------------------------------------------------------------------------------
895129203ScognetNormalizes the subnormal quadruple-precision floating-point value
896129203Scognetrepresented by the denormalized significand formed by the concatenation of
897129203Scognet`aSig0' and `aSig1'.  The normalized exponent is stored at the location
898129203Scognetpointed to by `zExpPtr'.  The most significant 49 bits of the normalized
899129203Scognetsignificand are stored at the location pointed to by `zSig0Ptr', and the
900129203Scognetleast significant 64 bits of the normalized significand are stored at the
901129203Scognetlocation pointed to by `zSig1Ptr'.
902129203Scognet-------------------------------------------------------------------------------
903129203Scognet*/
904129203Scognetstatic void
905129203Scognet normalizeFloat128Subnormal(
906129203Scognet     bits64 aSig0,
907129203Scognet     bits64 aSig1,
908129203Scognet     int32 *zExpPtr,
909129203Scognet     bits64 *zSig0Ptr,
910129203Scognet     bits64 *zSig1Ptr
911129203Scognet )
912129203Scognet{
913129203Scognet    int8 shiftCount;
914129203Scognet
915129203Scognet    if ( aSig0 == 0 ) {
916129203Scognet        shiftCount = countLeadingZeros64( aSig1 ) - 15;
917129203Scognet        if ( shiftCount < 0 ) {
918129203Scognet            *zSig0Ptr = aSig1>>( - shiftCount );
919129203Scognet            *zSig1Ptr = aSig1<<( shiftCount & 63 );
920129203Scognet        }
921129203Scognet        else {
922129203Scognet            *zSig0Ptr = aSig1<<shiftCount;
923129203Scognet            *zSig1Ptr = 0;
924129203Scognet        }
925129203Scognet        *zExpPtr = - shiftCount - 63;
926129203Scognet    }
927129203Scognet    else {
928129203Scognet        shiftCount = countLeadingZeros64( aSig0 ) - 15;
929129203Scognet        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
930129203Scognet        *zExpPtr = 1 - shiftCount;
931129203Scognet    }
932129203Scognet
933129203Scognet}
934129203Scognet
935129203Scognet/*
936129203Scognet-------------------------------------------------------------------------------
937129203ScognetPacks the sign `zSign', the exponent `zExp', and the significand formed
938129203Scognetby the concatenation of `zSig0' and `zSig1' into a quadruple-precision
939129203Scognetfloating-point value, returning the result.  After being shifted into the
940129203Scognetproper positions, the three fields `zSign', `zExp', and `zSig0' are simply
941129203Scognetadded together to form the most significant 32 bits of the result.  This
942129203Scognetmeans that any integer portion of `zSig0' will be added into the exponent.
943129203ScognetSince a properly normalized significand will have an integer portion equal
944129203Scognetto 1, the `zExp' input should be 1 less than the desired result exponent
945129203Scognetwhenever `zSig0' and `zSig1' concatenated form a complete, normalized
946129203Scognetsignificand.
947129203Scognet-------------------------------------------------------------------------------
948129203Scognet*/
949129203ScognetINLINE float128
950129203Scognet packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
951129203Scognet{
952129203Scognet    float128 z;
953129203Scognet
954129203Scognet    z.low = zSig1;
955129203Scognet    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
956129203Scognet    return z;
957129203Scognet
958129203Scognet}
959129203Scognet
960129203Scognet/*
961129203Scognet-------------------------------------------------------------------------------
962129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
963129203Scognetand extended significand formed by the concatenation of `zSig0', `zSig1',
964129203Scognetand `zSig2', and returns the proper quadruple-precision floating-point value
965129203Scognetcorresponding to the abstract input.  Ordinarily, the abstract value is
966129203Scognetsimply rounded and packed into the quadruple-precision format, with the
967129203Scognetinexact exception raised if the abstract input cannot be represented
968129203Scognetexactly.  However, if the abstract value is too large, the overflow and
969129203Scognetinexact exceptions are raised and an infinity or maximal finite value is
970129203Scognetreturned.  If the abstract value is too small, the input value is rounded to
971129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if
972129203Scognetthe abstract input cannot be represented exactly as a subnormal quadruple-
973129203Scognetprecision floating-point number.
974129203Scognet    The input significand must be normalized or smaller.  If the input
975129203Scognetsignificand is not normalized, `zExp' must be 0; in that case, the result
976129203Scognetreturned is a subnormal number, and it must not require rounding.  In the
977129203Scognetusual case that the input significand is normalized, `zExp' must be 1 less
978129203Scognetthan the ``true'' floating-point exponent.  The handling of underflow and
979129203Scognetoverflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
980129203Scognet-------------------------------------------------------------------------------
981129203Scognet*/
982129203Scognetstatic float128
983129203Scognet roundAndPackFloat128(
984129203Scognet     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
985129203Scognet{
986129203Scognet    int8 roundingMode;
987129203Scognet    flag roundNearestEven, increment, isTiny;
988129203Scognet
989129203Scognet    roundingMode = float_rounding_mode;
990129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
991129203Scognet    increment = ( (sbits64) zSig2 < 0 );
992129203Scognet    if ( ! roundNearestEven ) {
993129203Scognet        if ( roundingMode == float_round_to_zero ) {
994129203Scognet            increment = 0;
995129203Scognet        }
996129203Scognet        else {
997129203Scognet            if ( zSign ) {
998129203Scognet                increment = ( roundingMode == float_round_down ) && zSig2;
999129203Scognet            }
1000129203Scognet            else {
1001129203Scognet                increment = ( roundingMode == float_round_up ) && zSig2;
1002129203Scognet            }
1003129203Scognet        }
1004129203Scognet    }
1005129203Scognet    if ( 0x7FFD <= (bits32) zExp ) {
1006129203Scognet        if (    ( 0x7FFD < zExp )
1007129203Scognet             || (    ( zExp == 0x7FFD )
1008129203Scognet                  && eq128(
1009129203Scognet                         LIT64( 0x0001FFFFFFFFFFFF ),
1010129203Scognet                         LIT64( 0xFFFFFFFFFFFFFFFF ),
1011129203Scognet                         zSig0,
1012129203Scognet                         zSig1
1013129203Scognet                     )
1014129203Scognet                  && increment
1015129203Scognet                )
1016129203Scognet           ) {
1017129203Scognet            float_raise( float_flag_overflow | float_flag_inexact );
1018129203Scognet            if (    ( roundingMode == float_round_to_zero )
1019129203Scognet                 || ( zSign && ( roundingMode == float_round_up ) )
1020129203Scognet                 || ( ! zSign && ( roundingMode == float_round_down ) )
1021129203Scognet               ) {
1022129203Scognet                return
1023129203Scognet                    packFloat128(
1024129203Scognet                        zSign,
1025129203Scognet                        0x7FFE,
1026129203Scognet                        LIT64( 0x0000FFFFFFFFFFFF ),
1027129203Scognet                        LIT64( 0xFFFFFFFFFFFFFFFF )
1028129203Scognet                    );
1029129203Scognet            }
1030129203Scognet            return packFloat128( zSign, 0x7FFF, 0, 0 );
1031129203Scognet        }
1032129203Scognet        if ( zExp < 0 ) {
1033129203Scognet            isTiny =
1034129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
1035129203Scognet                || ( zExp < -1 )
1036129203Scognet                || ! increment
1037129203Scognet                || lt128(
1038129203Scognet                       zSig0,
1039129203Scognet                       zSig1,
1040129203Scognet                       LIT64( 0x0001FFFFFFFFFFFF ),
1041129203Scognet                       LIT64( 0xFFFFFFFFFFFFFFFF )
1042129203Scognet                   );
1043129203Scognet            shift128ExtraRightJamming(
1044129203Scognet                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1045129203Scognet            zExp = 0;
1046129203Scognet            if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
1047129203Scognet            if ( roundNearestEven ) {
1048129203Scognet                increment = ( (sbits64) zSig2 < 0 );
1049129203Scognet            }
1050129203Scognet            else {
1051129203Scognet                if ( zSign ) {
1052129203Scognet                    increment = ( roundingMode == float_round_down ) && zSig2;
1053129203Scognet                }
1054129203Scognet                else {
1055129203Scognet                    increment = ( roundingMode == float_round_up ) && zSig2;
1056129203Scognet                }
1057129203Scognet            }
1058129203Scognet        }
1059129203Scognet    }
1060129203Scognet    if ( zSig2 ) float_exception_flags |= float_flag_inexact;
1061129203Scognet    if ( increment ) {
1062129203Scognet        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1063129203Scognet        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1064129203Scognet    }
1065129203Scognet    else {
1066129203Scognet        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1067129203Scognet    }
1068129203Scognet    return packFloat128( zSign, zExp, zSig0, zSig1 );
1069129203Scognet
1070129203Scognet}
1071129203Scognet
1072129203Scognet/*
1073129203Scognet-------------------------------------------------------------------------------
1074129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
1075129203Scognetand significand formed by the concatenation of `zSig0' and `zSig1', and
1076129203Scognetreturns the proper quadruple-precision floating-point value corresponding
1077129203Scognetto the abstract input.  This routine is just like `roundAndPackFloat128'
1078129203Scognetexcept that the input significand has fewer bits and does not have to be
1079129203Scognetnormalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1080129203Scognetpoint exponent.
1081129203Scognet-------------------------------------------------------------------------------
1082129203Scognet*/
1083129203Scognetstatic float128
1084129203Scognet normalizeRoundAndPackFloat128(
1085129203Scognet     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
1086129203Scognet{
1087129203Scognet    int8 shiftCount;
1088129203Scognet    bits64 zSig2;
1089129203Scognet
1090129203Scognet    if ( zSig0 == 0 ) {
1091129203Scognet        zSig0 = zSig1;
1092129203Scognet        zSig1 = 0;
1093129203Scognet        zExp -= 64;
1094129203Scognet    }
1095129203Scognet    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1096129203Scognet    if ( 0 <= shiftCount ) {
1097129203Scognet        zSig2 = 0;
1098129203Scognet        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1099129203Scognet    }
1100129203Scognet    else {
1101129203Scognet        shift128ExtraRightJamming(
1102129203Scognet            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1103129203Scognet    }
1104129203Scognet    zExp -= shiftCount;
1105129203Scognet    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
1106129203Scognet
1107129203Scognet}
1108129203Scognet
1109129203Scognet#endif
1110129203Scognet
1111129203Scognet/*
1112129203Scognet-------------------------------------------------------------------------------
1113129203ScognetReturns the result of converting the 32-bit two's complement integer `a'
1114129203Scognetto the single-precision floating-point format.  The conversion is performed
1115129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1116129203Scognet-------------------------------------------------------------------------------
1117129203Scognet*/
1118129203Scognetfloat32 int32_to_float32( int32 a )
1119129203Scognet{
1120129203Scognet    flag zSign;
1121129203Scognet
1122129203Scognet    if ( a == 0 ) return 0;
1123129203Scognet    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1124129203Scognet    zSign = ( a < 0 );
1125129203Scognet    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
1126129203Scognet
1127129203Scognet}
1128129203Scognet
1129129203Scognet/*
1130129203Scognet-------------------------------------------------------------------------------
1131129203ScognetReturns the result of converting the 32-bit two's complement integer `a'
1132129203Scognetto the double-precision floating-point format.  The conversion is performed
1133129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1134129203Scognet-------------------------------------------------------------------------------
1135129203Scognet*/
1136129203Scognetfloat64 int32_to_float64( int32 a )
1137129203Scognet{
1138129203Scognet    flag zSign;
1139129203Scognet    uint32 absA;
1140129203Scognet    int8 shiftCount;
1141129203Scognet    bits64 zSig;
1142129203Scognet
1143129203Scognet    if ( a == 0 ) return 0;
1144129203Scognet    zSign = ( a < 0 );
1145129203Scognet    absA = zSign ? - a : a;
1146129203Scognet    shiftCount = countLeadingZeros32( absA ) + 21;
1147129203Scognet    zSig = absA;
1148129203Scognet    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1149129203Scognet
1150129203Scognet}
1151129203Scognet
1152129203Scognet#ifdef FLOATX80
1153129203Scognet
1154129203Scognet/*
1155129203Scognet-------------------------------------------------------------------------------
1156129203ScognetReturns the result of converting the 32-bit two's complement integer `a'
1157129203Scognetto the extended double-precision floating-point format.  The conversion
1158129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
1159129203ScognetArithmetic.
1160129203Scognet-------------------------------------------------------------------------------
1161129203Scognet*/
1162129203Scognetfloatx80 int32_to_floatx80( int32 a )
1163129203Scognet{
1164129203Scognet    flag zSign;
1165129203Scognet    uint32 absA;
1166129203Scognet    int8 shiftCount;
1167129203Scognet    bits64 zSig;
1168129203Scognet
1169129203Scognet    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1170129203Scognet    zSign = ( a < 0 );
1171129203Scognet    absA = zSign ? - a : a;
1172129203Scognet    shiftCount = countLeadingZeros32( absA ) + 32;
1173129203Scognet    zSig = absA;
1174129203Scognet    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1175129203Scognet
1176129203Scognet}
1177129203Scognet
1178129203Scognet#endif
1179129203Scognet
1180129203Scognet#ifdef FLOAT128
1181129203Scognet
1182129203Scognet/*
1183129203Scognet-------------------------------------------------------------------------------
1184129203ScognetReturns the result of converting the 32-bit two's complement integer `a' to
1185129203Scognetthe quadruple-precision floating-point format.  The conversion is performed
1186129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1187129203Scognet-------------------------------------------------------------------------------
1188129203Scognet*/
1189129203Scognetfloat128 int32_to_float128( int32 a )
1190129203Scognet{
1191129203Scognet    flag zSign;
1192129203Scognet    uint32 absA;
1193129203Scognet    int8 shiftCount;
1194129203Scognet    bits64 zSig0;
1195129203Scognet
1196129203Scognet    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1197129203Scognet    zSign = ( a < 0 );
1198129203Scognet    absA = zSign ? - a : a;
1199129203Scognet    shiftCount = countLeadingZeros32( absA ) + 17;
1200129203Scognet    zSig0 = absA;
1201129203Scognet    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1202129203Scognet
1203129203Scognet}
1204129203Scognet
1205129203Scognet#endif
1206129203Scognet
1207129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
1208129203Scognet/*
1209129203Scognet-------------------------------------------------------------------------------
1210129203ScognetReturns the result of converting the 64-bit two's complement integer `a'
1211129203Scognetto the single-precision floating-point format.  The conversion is performed
1212129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1213129203Scognet-------------------------------------------------------------------------------
1214129203Scognet*/
1215129203Scognetfloat32 int64_to_float32( int64 a )
1216129203Scognet{
1217129203Scognet    flag zSign;
1218129203Scognet    uint64 absA;
1219129203Scognet    int8 shiftCount;
1220129203Scognet
1221129203Scognet    if ( a == 0 ) return 0;
1222129203Scognet    zSign = ( a < 0 );
1223129203Scognet    absA = zSign ? - a : a;
1224129203Scognet    shiftCount = countLeadingZeros64( absA ) - 40;
1225129203Scognet    if ( 0 <= shiftCount ) {
1226129203Scognet        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1227129203Scognet    }
1228129203Scognet    else {
1229129203Scognet        shiftCount += 7;
1230129203Scognet        if ( shiftCount < 0 ) {
1231129203Scognet            shift64RightJamming( absA, - shiftCount, &absA );
1232129203Scognet        }
1233129203Scognet        else {
1234129203Scognet            absA <<= shiftCount;
1235129203Scognet        }
1236129203Scognet        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
1237129203Scognet    }
1238129203Scognet
1239129203Scognet}
1240129203Scognet
1241129203Scognet/*
1242129203Scognet-------------------------------------------------------------------------------
1243129203ScognetReturns the result of converting the 64-bit two's complement integer `a'
1244129203Scognetto the double-precision floating-point format.  The conversion is performed
1245129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1246129203Scognet-------------------------------------------------------------------------------
1247129203Scognet*/
1248129203Scognetfloat64 int64_to_float64( int64 a )
1249129203Scognet{
1250129203Scognet    flag zSign;
1251129203Scognet
1252129203Scognet    if ( a == 0 ) return 0;
1253129203Scognet    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1254129203Scognet        return packFloat64( 1, 0x43E, 0 );
1255129203Scognet    }
1256129203Scognet    zSign = ( a < 0 );
1257129203Scognet    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
1258129203Scognet
1259129203Scognet}
1260129203Scognet
1261129203Scognet#ifdef FLOATX80
1262129203Scognet
1263129203Scognet/*
1264129203Scognet-------------------------------------------------------------------------------
1265129203ScognetReturns the result of converting the 64-bit two's complement integer `a'
1266129203Scognetto the extended double-precision floating-point format.  The conversion
1267129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
1268129203ScognetArithmetic.
1269129203Scognet-------------------------------------------------------------------------------
1270129203Scognet*/
1271129203Scognetfloatx80 int64_to_floatx80( int64 a )
1272129203Scognet{
1273129203Scognet    flag zSign;
1274129203Scognet    uint64 absA;
1275129203Scognet    int8 shiftCount;
1276129203Scognet
1277129203Scognet    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1278129203Scognet    zSign = ( a < 0 );
1279129203Scognet    absA = zSign ? - a : a;
1280129203Scognet    shiftCount = countLeadingZeros64( absA );
1281129203Scognet    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1282129203Scognet
1283129203Scognet}
1284129203Scognet
1285129203Scognet#endif
1286129203Scognet
1287129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
1288129203Scognet
1289129203Scognet#ifdef FLOAT128
1290129203Scognet
1291129203Scognet/*
1292129203Scognet-------------------------------------------------------------------------------
1293129203ScognetReturns the result of converting the 64-bit two's complement integer `a' to
1294129203Scognetthe quadruple-precision floating-point format.  The conversion is performed
1295129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1296129203Scognet-------------------------------------------------------------------------------
1297129203Scognet*/
1298129203Scognetfloat128 int64_to_float128( int64 a )
1299129203Scognet{
1300129203Scognet    flag zSign;
1301129203Scognet    uint64 absA;
1302129203Scognet    int8 shiftCount;
1303129203Scognet    int32 zExp;
1304129203Scognet    bits64 zSig0, zSig1;
1305129203Scognet
1306129203Scognet    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1307129203Scognet    zSign = ( a < 0 );
1308129203Scognet    absA = zSign ? - a : a;
1309129203Scognet    shiftCount = countLeadingZeros64( absA ) + 49;
1310129203Scognet    zExp = 0x406E - shiftCount;
1311129203Scognet    if ( 64 <= shiftCount ) {
1312129203Scognet        zSig1 = 0;
1313129203Scognet        zSig0 = absA;
1314129203Scognet        shiftCount -= 64;
1315129203Scognet    }
1316129203Scognet    else {
1317129203Scognet        zSig1 = absA;
1318129203Scognet        zSig0 = 0;
1319129203Scognet    }
1320129203Scognet    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1321129203Scognet    return packFloat128( zSign, zExp, zSig0, zSig1 );
1322129203Scognet
1323129203Scognet}
1324129203Scognet
1325129203Scognet#endif
1326129203Scognet
1327129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1328129203Scognet/*
1329129203Scognet-------------------------------------------------------------------------------
1330129203ScognetReturns the result of converting the single-precision floating-point value
1331129203Scognet`a' to the 32-bit two's complement integer format.  The conversion is
1332129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
1333129203ScognetArithmetic---which means in particular that the conversion is rounded
1334129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
1335129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
1336129203Scognetlargest integer with the same sign as `a' is returned.
1337129203Scognet-------------------------------------------------------------------------------
1338129203Scognet*/
1339129203Scognetint32 float32_to_int32( float32 a )
1340129203Scognet{
1341129203Scognet    flag aSign;
1342129203Scognet    int16 aExp, shiftCount;
1343129203Scognet    bits32 aSig;
1344129203Scognet    bits64 aSig64;
1345129203Scognet
1346129203Scognet    aSig = extractFloat32Frac( a );
1347129203Scognet    aExp = extractFloat32Exp( a );
1348129203Scognet    aSign = extractFloat32Sign( a );
1349129203Scognet    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1350129203Scognet    if ( aExp ) aSig |= 0x00800000;
1351129203Scognet    shiftCount = 0xAF - aExp;
1352129203Scognet    aSig64 = aSig;
1353129203Scognet    aSig64 <<= 32;
1354129203Scognet    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1355129203Scognet    return roundAndPackInt32( aSign, aSig64 );
1356129203Scognet
1357129203Scognet}
1358129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
1359129203Scognet
1360129203Scognet/*
1361129203Scognet-------------------------------------------------------------------------------
1362129203ScognetReturns the result of converting the single-precision floating-point value
1363129203Scognet`a' to the 32-bit two's complement integer format.  The conversion is
1364129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
1365129203ScognetArithmetic, except that the conversion is always rounded toward zero.
1366129203ScognetIf `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1367129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is
1368129203Scognetreturned.
1369129203Scognet-------------------------------------------------------------------------------
1370129203Scognet*/
1371129203Scognetint32 float32_to_int32_round_to_zero( float32 a )
1372129203Scognet{
1373129203Scognet    flag aSign;
1374129203Scognet    int16 aExp, shiftCount;
1375129203Scognet    bits32 aSig;
1376129203Scognet    int32 z;
1377129203Scognet
1378129203Scognet    aSig = extractFloat32Frac( a );
1379129203Scognet    aExp = extractFloat32Exp( a );
1380129203Scognet    aSign = extractFloat32Sign( a );
1381129203Scognet    shiftCount = aExp - 0x9E;
1382129203Scognet    if ( 0 <= shiftCount ) {
1383129203Scognet        if ( a != 0xCF000000 ) {
1384129203Scognet            float_raise( float_flag_invalid );
1385129203Scognet            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1386129203Scognet        }
1387129203Scognet        return (sbits32) 0x80000000;
1388129203Scognet    }
1389129203Scognet    else if ( aExp <= 0x7E ) {
1390129203Scognet        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
1391129203Scognet        return 0;
1392129203Scognet    }
1393129203Scognet    aSig = ( aSig | 0x00800000 )<<8;
1394129203Scognet    z = aSig>>( - shiftCount );
1395129203Scognet    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1396129203Scognet        float_exception_flags |= float_flag_inexact;
1397129203Scognet    }
1398129203Scognet    if ( aSign ) z = - z;
1399129203Scognet    return z;
1400129203Scognet
1401129203Scognet}
1402129203Scognet
1403129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
1404129203Scognet/*
1405129203Scognet-------------------------------------------------------------------------------
1406129203ScognetReturns the result of converting the single-precision floating-point value
1407129203Scognet`a' to the 64-bit two's complement integer format.  The conversion is
1408129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
1409129203ScognetArithmetic---which means in particular that the conversion is rounded
1410129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
1411129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
1412129203Scognetlargest integer with the same sign as `a' is returned.
1413129203Scognet-------------------------------------------------------------------------------
1414129203Scognet*/
1415129203Scognetint64 float32_to_int64( float32 a )
1416129203Scognet{
1417129203Scognet    flag aSign;
1418129203Scognet    int16 aExp, shiftCount;
1419129203Scognet    bits32 aSig;
1420129203Scognet    bits64 aSig64, aSigExtra;
1421129203Scognet
1422129203Scognet    aSig = extractFloat32Frac( a );
1423129203Scognet    aExp = extractFloat32Exp( a );
1424129203Scognet    aSign = extractFloat32Sign( a );
1425129203Scognet    shiftCount = 0xBE - aExp;
1426129203Scognet    if ( shiftCount < 0 ) {
1427129203Scognet        float_raise( float_flag_invalid );
1428129203Scognet        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1429129203Scognet            return LIT64( 0x7FFFFFFFFFFFFFFF );
1430129203Scognet        }
1431129203Scognet        return (sbits64) LIT64( 0x8000000000000000 );
1432129203Scognet    }
1433129203Scognet    if ( aExp ) aSig |= 0x00800000;
1434129203Scognet    aSig64 = aSig;
1435129203Scognet    aSig64 <<= 40;
1436129203Scognet    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1437129203Scognet    return roundAndPackInt64( aSign, aSig64, aSigExtra );
1438129203Scognet
1439129203Scognet}
1440129203Scognet
1441129203Scognet/*
1442129203Scognet-------------------------------------------------------------------------------
1443129203ScognetReturns the result of converting the single-precision floating-point value
1444129203Scognet`a' to the 64-bit two's complement integer format.  The conversion is
1445129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
1446129203ScognetArithmetic, except that the conversion is always rounded toward zero.  If
1447129203Scognet`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1448129203Scognetconversion overflows, the largest integer with the same sign as `a' is
1449129203Scognetreturned.
1450129203Scognet-------------------------------------------------------------------------------
1451129203Scognet*/
1452129203Scognetint64 float32_to_int64_round_to_zero( float32 a )
1453129203Scognet{
1454129203Scognet    flag aSign;
1455129203Scognet    int16 aExp, shiftCount;
1456129203Scognet    bits32 aSig;
1457129203Scognet    bits64 aSig64;
1458129203Scognet    int64 z;
1459129203Scognet
1460129203Scognet    aSig = extractFloat32Frac( a );
1461129203Scognet    aExp = extractFloat32Exp( a );
1462129203Scognet    aSign = extractFloat32Sign( a );
1463129203Scognet    shiftCount = aExp - 0xBE;
1464129203Scognet    if ( 0 <= shiftCount ) {
1465129203Scognet        if ( a != 0xDF000000 ) {
1466129203Scognet            float_raise( float_flag_invalid );
1467129203Scognet            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1468129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
1469129203Scognet            }
1470129203Scognet        }
1471129203Scognet        return (sbits64) LIT64( 0x8000000000000000 );
1472129203Scognet    }
1473129203Scognet    else if ( aExp <= 0x7E ) {
1474129203Scognet        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
1475129203Scognet        return 0;
1476129203Scognet    }
1477129203Scognet    aSig64 = aSig | 0x00800000;
1478129203Scognet    aSig64 <<= 40;
1479129203Scognet    z = aSig64>>( - shiftCount );
1480129203Scognet    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1481129203Scognet        float_exception_flags |= float_flag_inexact;
1482129203Scognet    }
1483129203Scognet    if ( aSign ) z = - z;
1484129203Scognet    return z;
1485129203Scognet
1486129203Scognet}
1487129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
1488129203Scognet
1489129203Scognet/*
1490129203Scognet-------------------------------------------------------------------------------
1491129203ScognetReturns the result of converting the single-precision floating-point value
1492129203Scognet`a' to the double-precision floating-point format.  The conversion is
1493129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
1494129203ScognetArithmetic.
1495129203Scognet-------------------------------------------------------------------------------
1496129203Scognet*/
1497129203Scognetfloat64 float32_to_float64( float32 a )
1498129203Scognet{
1499129203Scognet    flag aSign;
1500129203Scognet    int16 aExp;
1501129203Scognet    bits32 aSig;
1502129203Scognet
1503129203Scognet    aSig = extractFloat32Frac( a );
1504129203Scognet    aExp = extractFloat32Exp( a );
1505129203Scognet    aSign = extractFloat32Sign( a );
1506129203Scognet    if ( aExp == 0xFF ) {
1507129203Scognet        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
1508129203Scognet        return packFloat64( aSign, 0x7FF, 0 );
1509129203Scognet    }
1510129203Scognet    if ( aExp == 0 ) {
1511129203Scognet        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1512129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1513129203Scognet        --aExp;
1514129203Scognet    }
1515129203Scognet    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1516129203Scognet
1517129203Scognet}
1518129203Scognet
1519129203Scognet#ifdef FLOATX80
1520129203Scognet
1521129203Scognet/*
1522129203Scognet-------------------------------------------------------------------------------
1523129203ScognetReturns the result of converting the single-precision floating-point value
1524129203Scognet`a' to the extended double-precision floating-point format.  The conversion
1525129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
1526129203ScognetArithmetic.
1527129203Scognet-------------------------------------------------------------------------------
1528129203Scognet*/
1529129203Scognetfloatx80 float32_to_floatx80( float32 a )
1530129203Scognet{
1531129203Scognet    flag aSign;
1532129203Scognet    int16 aExp;
1533129203Scognet    bits32 aSig;
1534129203Scognet
1535129203Scognet    aSig = extractFloat32Frac( a );
1536129203Scognet    aExp = extractFloat32Exp( a );
1537129203Scognet    aSign = extractFloat32Sign( a );
1538129203Scognet    if ( aExp == 0xFF ) {
1539129203Scognet        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
1540129203Scognet        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1541129203Scognet    }
1542129203Scognet    if ( aExp == 0 ) {
1543129203Scognet        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1544129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1545129203Scognet    }
1546129203Scognet    aSig |= 0x00800000;
1547129203Scognet    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1548129203Scognet
1549129203Scognet}
1550129203Scognet
1551129203Scognet#endif
1552129203Scognet
1553129203Scognet#ifdef FLOAT128
1554129203Scognet
1555129203Scognet/*
1556129203Scognet-------------------------------------------------------------------------------
1557129203ScognetReturns the result of converting the single-precision floating-point value
1558129203Scognet`a' to the double-precision floating-point format.  The conversion is
1559129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
1560129203ScognetArithmetic.
1561129203Scognet-------------------------------------------------------------------------------
1562129203Scognet*/
1563129203Scognetfloat128 float32_to_float128( float32 a )
1564129203Scognet{
1565129203Scognet    flag aSign;
1566129203Scognet    int16 aExp;
1567129203Scognet    bits32 aSig;
1568129203Scognet
1569129203Scognet    aSig = extractFloat32Frac( a );
1570129203Scognet    aExp = extractFloat32Exp( a );
1571129203Scognet    aSign = extractFloat32Sign( a );
1572129203Scognet    if ( aExp == 0xFF ) {
1573129203Scognet        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
1574129203Scognet        return packFloat128( aSign, 0x7FFF, 0, 0 );
1575129203Scognet    }
1576129203Scognet    if ( aExp == 0 ) {
1577129203Scognet        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1578129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1579129203Scognet        --aExp;
1580129203Scognet    }
1581129203Scognet    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1582129203Scognet
1583129203Scognet}
1584129203Scognet
1585129203Scognet#endif
1586129203Scognet
1587129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1588129203Scognet/*
1589129203Scognet-------------------------------------------------------------------------------
1590129203ScognetRounds the single-precision floating-point value `a' to an integer, and
1591129203Scognetreturns the result as a single-precision floating-point value.  The
1592129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary
1593129203ScognetFloating-Point Arithmetic.
1594129203Scognet-------------------------------------------------------------------------------
1595129203Scognet*/
1596129203Scognetfloat32 float32_round_to_int( float32 a )
1597129203Scognet{
1598129203Scognet    flag aSign;
1599129203Scognet    int16 aExp;
1600129203Scognet    bits32 lastBitMask, roundBitsMask;
1601129203Scognet    int8 roundingMode;
1602129203Scognet    float32 z;
1603129203Scognet
1604129203Scognet    aExp = extractFloat32Exp( a );
1605129203Scognet    if ( 0x96 <= aExp ) {
1606129203Scognet        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1607129203Scognet            return propagateFloat32NaN( a, a );
1608129203Scognet        }
1609129203Scognet        return a;
1610129203Scognet    }
1611129203Scognet    if ( aExp <= 0x7E ) {
1612129203Scognet        if ( (bits32) ( a<<1 ) == 0 ) return a;
1613129203Scognet        float_exception_flags |= float_flag_inexact;
1614129203Scognet        aSign = extractFloat32Sign( a );
1615129203Scognet        switch ( float_rounding_mode ) {
1616129203Scognet         case float_round_nearest_even:
1617129203Scognet            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1618129203Scognet                return packFloat32( aSign, 0x7F, 0 );
1619129203Scognet            }
1620129203Scognet            break;
1621129203Scognet	 case float_round_to_zero:
1622129203Scognet	    break;
1623129203Scognet         case float_round_down:
1624129203Scognet            return aSign ? 0xBF800000 : 0;
1625129203Scognet         case float_round_up:
1626129203Scognet            return aSign ? 0x80000000 : 0x3F800000;
1627129203Scognet        }
1628129203Scognet        return packFloat32( aSign, 0, 0 );
1629129203Scognet    }
1630129203Scognet    lastBitMask = 1;
1631129203Scognet    lastBitMask <<= 0x96 - aExp;
1632129203Scognet    roundBitsMask = lastBitMask - 1;
1633129203Scognet    z = a;
1634129203Scognet    roundingMode = float_rounding_mode;
1635129203Scognet    if ( roundingMode == float_round_nearest_even ) {
1636129203Scognet        z += lastBitMask>>1;
1637129203Scognet        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1638129203Scognet    }
1639129203Scognet    else if ( roundingMode != float_round_to_zero ) {
1640129203Scognet        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1641129203Scognet            z += roundBitsMask;
1642129203Scognet        }
1643129203Scognet    }
1644129203Scognet    z &= ~ roundBitsMask;
1645129203Scognet    if ( z != a ) float_exception_flags |= float_flag_inexact;
1646129203Scognet    return z;
1647129203Scognet
1648129203Scognet}
1649129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
1650129203Scognet
1651129203Scognet/*
1652129203Scognet-------------------------------------------------------------------------------
1653129203ScognetReturns the result of adding the absolute values of the single-precision
1654129203Scognetfloating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1655129203Scognetbefore being returned.  `zSign' is ignored if the result is a NaN.
1656129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary
1657129203ScognetFloating-Point Arithmetic.
1658129203Scognet-------------------------------------------------------------------------------
1659129203Scognet*/
1660129203Scognetstatic float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
1661129203Scognet{
1662129203Scognet    int16 aExp, bExp, zExp;
1663129203Scognet    bits32 aSig, bSig, zSig;
1664129203Scognet    int16 expDiff;
1665129203Scognet
1666129203Scognet    aSig = extractFloat32Frac( a );
1667129203Scognet    aExp = extractFloat32Exp( a );
1668129203Scognet    bSig = extractFloat32Frac( b );
1669129203Scognet    bExp = extractFloat32Exp( b );
1670129203Scognet    expDiff = aExp - bExp;
1671129203Scognet    aSig <<= 6;
1672129203Scognet    bSig <<= 6;
1673129203Scognet    if ( 0 < expDiff ) {
1674129203Scognet        if ( aExp == 0xFF ) {
1675129203Scognet            if ( aSig ) return propagateFloat32NaN( a, b );
1676129203Scognet            return a;
1677129203Scognet        }
1678129203Scognet        if ( bExp == 0 ) {
1679129203Scognet            --expDiff;
1680129203Scognet        }
1681129203Scognet        else {
1682129203Scognet            bSig |= 0x20000000;
1683129203Scognet        }
1684129203Scognet        shift32RightJamming( bSig, expDiff, &bSig );
1685129203Scognet        zExp = aExp;
1686129203Scognet    }
1687129203Scognet    else if ( expDiff < 0 ) {
1688129203Scognet        if ( bExp == 0xFF ) {
1689129203Scognet            if ( bSig ) return propagateFloat32NaN( a, b );
1690129203Scognet            return packFloat32( zSign, 0xFF, 0 );
1691129203Scognet        }
1692129203Scognet        if ( aExp == 0 ) {
1693129203Scognet            ++expDiff;
1694129203Scognet        }
1695129203Scognet        else {
1696129203Scognet            aSig |= 0x20000000;
1697129203Scognet        }
1698129203Scognet        shift32RightJamming( aSig, - expDiff, &aSig );
1699129203Scognet        zExp = bExp;
1700129203Scognet    }
1701129203Scognet    else {
1702129203Scognet        if ( aExp == 0xFF ) {
1703129203Scognet            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1704129203Scognet            return a;
1705129203Scognet        }
1706129203Scognet        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1707129203Scognet        zSig = 0x40000000 + aSig + bSig;
1708129203Scognet        zExp = aExp;
1709129203Scognet        goto roundAndPack;
1710129203Scognet    }
1711129203Scognet    aSig |= 0x20000000;
1712129203Scognet    zSig = ( aSig + bSig )<<1;
1713129203Scognet    --zExp;
1714129203Scognet    if ( (sbits32) zSig < 0 ) {
1715129203Scognet        zSig = aSig + bSig;
1716129203Scognet        ++zExp;
1717129203Scognet    }
1718129203Scognet roundAndPack:
1719129203Scognet    return roundAndPackFloat32( zSign, zExp, zSig );
1720129203Scognet
1721129203Scognet}
1722129203Scognet
1723129203Scognet/*
1724129203Scognet-------------------------------------------------------------------------------
1725129203ScognetReturns the result of subtracting the absolute values of the single-
1726129203Scognetprecision floating-point values `a' and `b'.  If `zSign' is 1, the
1727129203Scognetdifference is negated before being returned.  `zSign' is ignored if the
1728129203Scognetresult is a NaN.  The subtraction is performed according to the IEC/IEEE
1729129203ScognetStandard for Binary Floating-Point Arithmetic.
1730129203Scognet-------------------------------------------------------------------------------
1731129203Scognet*/
1732129203Scognetstatic float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1733129203Scognet{
1734129203Scognet    int16 aExp, bExp, zExp;
1735129203Scognet    bits32 aSig, bSig, zSig;
1736129203Scognet    int16 expDiff;
1737129203Scognet
1738129203Scognet    aSig = extractFloat32Frac( a );
1739129203Scognet    aExp = extractFloat32Exp( a );
1740129203Scognet    bSig = extractFloat32Frac( b );
1741129203Scognet    bExp = extractFloat32Exp( b );
1742129203Scognet    expDiff = aExp - bExp;
1743129203Scognet    aSig <<= 7;
1744129203Scognet    bSig <<= 7;
1745129203Scognet    if ( 0 < expDiff ) goto aExpBigger;
1746129203Scognet    if ( expDiff < 0 ) goto bExpBigger;
1747129203Scognet    if ( aExp == 0xFF ) {
1748129203Scognet        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1749129203Scognet        float_raise( float_flag_invalid );
1750129203Scognet        return float32_default_nan;
1751129203Scognet    }
1752129203Scognet    if ( aExp == 0 ) {
1753129203Scognet        aExp = 1;
1754129203Scognet        bExp = 1;
1755129203Scognet    }
1756129203Scognet    if ( bSig < aSig ) goto aBigger;
1757129203Scognet    if ( aSig < bSig ) goto bBigger;
1758129203Scognet    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
1759129203Scognet bExpBigger:
1760129203Scognet    if ( bExp == 0xFF ) {
1761129203Scognet        if ( bSig ) return propagateFloat32NaN( a, b );
1762129203Scognet        return packFloat32( zSign ^ 1, 0xFF, 0 );
1763129203Scognet    }
1764129203Scognet    if ( aExp == 0 ) {
1765129203Scognet        ++expDiff;
1766129203Scognet    }
1767129203Scognet    else {
1768129203Scognet        aSig |= 0x40000000;
1769129203Scognet    }
1770129203Scognet    shift32RightJamming( aSig, - expDiff, &aSig );
1771129203Scognet    bSig |= 0x40000000;
1772129203Scognet bBigger:
1773129203Scognet    zSig = bSig - aSig;
1774129203Scognet    zExp = bExp;
1775129203Scognet    zSign ^= 1;
1776129203Scognet    goto normalizeRoundAndPack;
1777129203Scognet aExpBigger:
1778129203Scognet    if ( aExp == 0xFF ) {
1779129203Scognet        if ( aSig ) return propagateFloat32NaN( a, b );
1780129203Scognet        return a;
1781129203Scognet    }
1782129203Scognet    if ( bExp == 0 ) {
1783129203Scognet        --expDiff;
1784129203Scognet    }
1785129203Scognet    else {
1786129203Scognet        bSig |= 0x40000000;
1787129203Scognet    }
1788129203Scognet    shift32RightJamming( bSig, expDiff, &bSig );
1789129203Scognet    aSig |= 0x40000000;
1790129203Scognet aBigger:
1791129203Scognet    zSig = aSig - bSig;
1792129203Scognet    zExp = aExp;
1793129203Scognet normalizeRoundAndPack:
1794129203Scognet    --zExp;
1795129203Scognet    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
1796129203Scognet
1797129203Scognet}
1798129203Scognet
1799129203Scognet/*
1800129203Scognet-------------------------------------------------------------------------------
1801129203ScognetReturns the result of adding the single-precision floating-point values `a'
1802129203Scognetand `b'.  The operation is performed according to the IEC/IEEE Standard for
1803129203ScognetBinary Floating-Point Arithmetic.
1804129203Scognet-------------------------------------------------------------------------------
1805129203Scognet*/
1806129203Scognetfloat32 float32_add( float32 a, float32 b )
1807129203Scognet{
1808129203Scognet    flag aSign, bSign;
1809129203Scognet
1810129203Scognet    aSign = extractFloat32Sign( a );
1811129203Scognet    bSign = extractFloat32Sign( b );
1812129203Scognet    if ( aSign == bSign ) {
1813129203Scognet        return addFloat32Sigs( a, b, aSign );
1814129203Scognet    }
1815129203Scognet    else {
1816129203Scognet        return subFloat32Sigs( a, b, aSign );
1817129203Scognet    }
1818129203Scognet
1819129203Scognet}
1820129203Scognet
1821129203Scognet/*
1822129203Scognet-------------------------------------------------------------------------------
1823129203ScognetReturns the result of subtracting the single-precision floating-point values
1824129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1825129203Scognetfor Binary Floating-Point Arithmetic.
1826129203Scognet-------------------------------------------------------------------------------
1827129203Scognet*/
1828129203Scognetfloat32 float32_sub( float32 a, float32 b )
1829129203Scognet{
1830129203Scognet    flag aSign, bSign;
1831129203Scognet
1832129203Scognet    aSign = extractFloat32Sign( a );
1833129203Scognet    bSign = extractFloat32Sign( b );
1834129203Scognet    if ( aSign == bSign ) {
1835129203Scognet        return subFloat32Sigs( a, b, aSign );
1836129203Scognet    }
1837129203Scognet    else {
1838129203Scognet        return addFloat32Sigs( a, b, aSign );
1839129203Scognet    }
1840129203Scognet
1841129203Scognet}
1842129203Scognet
1843129203Scognet/*
1844129203Scognet-------------------------------------------------------------------------------
1845129203ScognetReturns the result of multiplying the single-precision floating-point values
1846129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1847129203Scognetfor Binary Floating-Point Arithmetic.
1848129203Scognet-------------------------------------------------------------------------------
1849129203Scognet*/
1850129203Scognetfloat32 float32_mul( float32 a, float32 b )
1851129203Scognet{
1852129203Scognet    flag aSign, bSign, zSign;
1853129203Scognet    int16 aExp, bExp, zExp;
1854129203Scognet    bits32 aSig, bSig;
1855129203Scognet    bits64 zSig64;
1856129203Scognet    bits32 zSig;
1857129203Scognet
1858129203Scognet    aSig = extractFloat32Frac( a );
1859129203Scognet    aExp = extractFloat32Exp( a );
1860129203Scognet    aSign = extractFloat32Sign( a );
1861129203Scognet    bSig = extractFloat32Frac( b );
1862129203Scognet    bExp = extractFloat32Exp( b );
1863129203Scognet    bSign = extractFloat32Sign( b );
1864129203Scognet    zSign = aSign ^ bSign;
1865129203Scognet    if ( aExp == 0xFF ) {
1866129203Scognet        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1867129203Scognet            return propagateFloat32NaN( a, b );
1868129203Scognet        }
1869129203Scognet        if ( ( bExp | bSig ) == 0 ) {
1870129203Scognet            float_raise( float_flag_invalid );
1871129203Scognet            return float32_default_nan;
1872129203Scognet        }
1873129203Scognet        return packFloat32( zSign, 0xFF, 0 );
1874129203Scognet    }
1875129203Scognet    if ( bExp == 0xFF ) {
1876129203Scognet        if ( bSig ) return propagateFloat32NaN( a, b );
1877129203Scognet        if ( ( aExp | aSig ) == 0 ) {
1878129203Scognet            float_raise( float_flag_invalid );
1879129203Scognet            return float32_default_nan;
1880129203Scognet        }
1881129203Scognet        return packFloat32( zSign, 0xFF, 0 );
1882129203Scognet    }
1883129203Scognet    if ( aExp == 0 ) {
1884129203Scognet        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1885129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1886129203Scognet    }
1887129203Scognet    if ( bExp == 0 ) {
1888129203Scognet        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1889129203Scognet        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1890129203Scognet    }
1891129203Scognet    zExp = aExp + bExp - 0x7F;
1892129203Scognet    aSig = ( aSig | 0x00800000 )<<7;
1893129203Scognet    bSig = ( bSig | 0x00800000 )<<8;
1894129203Scognet    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1895129203Scognet    zSig = zSig64;
1896129203Scognet    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1897129203Scognet        zSig <<= 1;
1898129203Scognet        --zExp;
1899129203Scognet    }
1900129203Scognet    return roundAndPackFloat32( zSign, zExp, zSig );
1901129203Scognet
1902129203Scognet}
1903129203Scognet
1904129203Scognet/*
1905129203Scognet-------------------------------------------------------------------------------
1906129203ScognetReturns the result of dividing the single-precision floating-point value `a'
1907129203Scognetby the corresponding value `b'.  The operation is performed according to the
1908129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
1909129203Scognet-------------------------------------------------------------------------------
1910129203Scognet*/
1911129203Scognetfloat32 float32_div( float32 a, float32 b )
1912129203Scognet{
1913129203Scognet    flag aSign, bSign, zSign;
1914129203Scognet    int16 aExp, bExp, zExp;
1915129203Scognet    bits32 aSig, bSig, zSig;
1916129203Scognet
1917129203Scognet    aSig = extractFloat32Frac( a );
1918129203Scognet    aExp = extractFloat32Exp( a );
1919129203Scognet    aSign = extractFloat32Sign( a );
1920129203Scognet    bSig = extractFloat32Frac( b );
1921129203Scognet    bExp = extractFloat32Exp( b );
1922129203Scognet    bSign = extractFloat32Sign( b );
1923129203Scognet    zSign = aSign ^ bSign;
1924129203Scognet    if ( aExp == 0xFF ) {
1925129203Scognet        if ( aSig ) return propagateFloat32NaN( a, b );
1926129203Scognet        if ( bExp == 0xFF ) {
1927129203Scognet            if ( bSig ) return propagateFloat32NaN( a, b );
1928129203Scognet            float_raise( float_flag_invalid );
1929129203Scognet            return float32_default_nan;
1930129203Scognet        }
1931129203Scognet        return packFloat32( zSign, 0xFF, 0 );
1932129203Scognet    }
1933129203Scognet    if ( bExp == 0xFF ) {
1934129203Scognet        if ( bSig ) return propagateFloat32NaN( a, b );
1935129203Scognet        return packFloat32( zSign, 0, 0 );
1936129203Scognet    }
1937129203Scognet    if ( bExp == 0 ) {
1938129203Scognet        if ( bSig == 0 ) {
1939129203Scognet            if ( ( aExp | aSig ) == 0 ) {
1940129203Scognet                float_raise( float_flag_invalid );
1941129203Scognet                return float32_default_nan;
1942129203Scognet            }
1943129203Scognet            float_raise( float_flag_divbyzero );
1944129203Scognet            return packFloat32( zSign, 0xFF, 0 );
1945129203Scognet        }
1946129203Scognet        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1947129203Scognet    }
1948129203Scognet    if ( aExp == 0 ) {
1949129203Scognet        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1950129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1951129203Scognet    }
1952129203Scognet    zExp = aExp - bExp + 0x7D;
1953129203Scognet    aSig = ( aSig | 0x00800000 )<<7;
1954129203Scognet    bSig = ( bSig | 0x00800000 )<<8;
1955129203Scognet    if ( bSig <= ( aSig + aSig ) ) {
1956129203Scognet        aSig >>= 1;
1957129203Scognet        ++zExp;
1958129203Scognet    }
1959129203Scognet    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
1960129203Scognet    if ( ( zSig & 0x3F ) == 0 ) {
1961129203Scognet        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
1962129203Scognet    }
1963129203Scognet    return roundAndPackFloat32( zSign, zExp, zSig );
1964129203Scognet
1965129203Scognet}
1966129203Scognet
1967129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1968129203Scognet/*
1969129203Scognet-------------------------------------------------------------------------------
1970129203ScognetReturns the remainder of the single-precision floating-point value `a'
1971129203Scognetwith respect to the corresponding value `b'.  The operation is performed
1972129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1973129203Scognet-------------------------------------------------------------------------------
1974129203Scognet*/
1975129203Scognetfloat32 float32_rem( float32 a, float32 b )
1976129203Scognet{
1977129203Scognet    flag aSign, bSign, zSign;
1978129203Scognet    int16 aExp, bExp, expDiff;
1979129203Scognet    bits32 aSig, bSig;
1980129203Scognet    bits32 q;
1981129203Scognet    bits64 aSig64, bSig64, q64;
1982129203Scognet    bits32 alternateASig;
1983129203Scognet    sbits32 sigMean;
1984129203Scognet
1985129203Scognet    aSig = extractFloat32Frac( a );
1986129203Scognet    aExp = extractFloat32Exp( a );
1987129203Scognet    aSign = extractFloat32Sign( a );
1988129203Scognet    bSig = extractFloat32Frac( b );
1989129203Scognet    bExp = extractFloat32Exp( b );
1990129203Scognet    bSign = extractFloat32Sign( b );
1991129203Scognet    if ( aExp == 0xFF ) {
1992129203Scognet        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1993129203Scognet            return propagateFloat32NaN( a, b );
1994129203Scognet        }
1995129203Scognet        float_raise( float_flag_invalid );
1996129203Scognet        return float32_default_nan;
1997129203Scognet    }
1998129203Scognet    if ( bExp == 0xFF ) {
1999129203Scognet        if ( bSig ) return propagateFloat32NaN( a, b );
2000129203Scognet        return a;
2001129203Scognet    }
2002129203Scognet    if ( bExp == 0 ) {
2003129203Scognet        if ( bSig == 0 ) {
2004129203Scognet            float_raise( float_flag_invalid );
2005129203Scognet            return float32_default_nan;
2006129203Scognet        }
2007129203Scognet        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2008129203Scognet    }
2009129203Scognet    if ( aExp == 0 ) {
2010129203Scognet        if ( aSig == 0 ) return a;
2011129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2012129203Scognet    }
2013129203Scognet    expDiff = aExp - bExp;
2014129203Scognet    aSig |= 0x00800000;
2015129203Scognet    bSig |= 0x00800000;
2016129203Scognet    if ( expDiff < 32 ) {
2017129203Scognet        aSig <<= 8;
2018129203Scognet        bSig <<= 8;
2019129203Scognet        if ( expDiff < 0 ) {
2020129203Scognet            if ( expDiff < -1 ) return a;
2021129203Scognet            aSig >>= 1;
2022129203Scognet        }
2023129203Scognet        q = ( bSig <= aSig );
2024129203Scognet        if ( q ) aSig -= bSig;
2025129203Scognet        if ( 0 < expDiff ) {
2026129203Scognet            q = ( ( (bits64) aSig )<<32 ) / bSig;
2027129203Scognet            q >>= 32 - expDiff;
2028129203Scognet            bSig >>= 2;
2029129203Scognet            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2030129203Scognet        }
2031129203Scognet        else {
2032129203Scognet            aSig >>= 2;
2033129203Scognet            bSig >>= 2;
2034129203Scognet        }
2035129203Scognet    }
2036129203Scognet    else {
2037129203Scognet        if ( bSig <= aSig ) aSig -= bSig;
2038129203Scognet        aSig64 = ( (bits64) aSig )<<40;
2039129203Scognet        bSig64 = ( (bits64) bSig )<<40;
2040129203Scognet        expDiff -= 64;
2041129203Scognet        while ( 0 < expDiff ) {
2042129203Scognet            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2043129203Scognet            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2044129203Scognet            aSig64 = - ( ( bSig * q64 )<<38 );
2045129203Scognet            expDiff -= 62;
2046129203Scognet        }
2047129203Scognet        expDiff += 64;
2048129203Scognet        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2049129203Scognet        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2050129203Scognet        q = q64>>( 64 - expDiff );
2051129203Scognet        bSig <<= 6;
2052129203Scognet        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2053129203Scognet    }
2054129203Scognet    do {
2055129203Scognet        alternateASig = aSig;
2056129203Scognet        ++q;
2057129203Scognet        aSig -= bSig;
2058129203Scognet    } while ( 0 <= (sbits32) aSig );
2059129203Scognet    sigMean = aSig + alternateASig;
2060129203Scognet    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2061129203Scognet        aSig = alternateASig;
2062129203Scognet    }
2063129203Scognet    zSign = ( (sbits32) aSig < 0 );
2064129203Scognet    if ( zSign ) aSig = - aSig;
2065129203Scognet    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
2066129203Scognet
2067129203Scognet}
2068129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
2069129203Scognet
2070129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2071129203Scognet/*
2072129203Scognet-------------------------------------------------------------------------------
2073129203ScognetReturns the square root of the single-precision floating-point value `a'.
2074129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary
2075129203ScognetFloating-Point Arithmetic.
2076129203Scognet-------------------------------------------------------------------------------
2077129203Scognet*/
2078129203Scognetfloat32 float32_sqrt( float32 a )
2079129203Scognet{
2080129203Scognet    flag aSign;
2081129203Scognet    int16 aExp, zExp;
2082129203Scognet    bits32 aSig, zSig;
2083129203Scognet    bits64 rem, term;
2084129203Scognet
2085129203Scognet    aSig = extractFloat32Frac( a );
2086129203Scognet    aExp = extractFloat32Exp( a );
2087129203Scognet    aSign = extractFloat32Sign( a );
2088129203Scognet    if ( aExp == 0xFF ) {
2089129203Scognet        if ( aSig ) return propagateFloat32NaN( a, 0 );
2090129203Scognet        if ( ! aSign ) return a;
2091129203Scognet        float_raise( float_flag_invalid );
2092129203Scognet        return float32_default_nan;
2093129203Scognet    }
2094129203Scognet    if ( aSign ) {
2095129203Scognet        if ( ( aExp | aSig ) == 0 ) return a;
2096129203Scognet        float_raise( float_flag_invalid );
2097129203Scognet        return float32_default_nan;
2098129203Scognet    }
2099129203Scognet    if ( aExp == 0 ) {
2100129203Scognet        if ( aSig == 0 ) return 0;
2101129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2102129203Scognet    }
2103129203Scognet    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2104129203Scognet    aSig = ( aSig | 0x00800000 )<<8;
2105129203Scognet    zSig = estimateSqrt32( aExp, aSig ) + 2;
2106129203Scognet    if ( ( zSig & 0x7F ) <= 5 ) {
2107129203Scognet        if ( zSig < 2 ) {
2108129203Scognet            zSig = 0x7FFFFFFF;
2109129203Scognet            goto roundAndPack;
2110129203Scognet        }
2111129203Scognet        aSig >>= aExp & 1;
2112129203Scognet        term = ( (bits64) zSig ) * zSig;
2113129203Scognet        rem = ( ( (bits64) aSig )<<32 ) - term;
2114129203Scognet        while ( (sbits64) rem < 0 ) {
2115129203Scognet            --zSig;
2116129203Scognet            rem += ( ( (bits64) zSig )<<1 ) | 1;
2117129203Scognet        }
2118129203Scognet        zSig |= ( rem != 0 );
2119129203Scognet    }
2120129203Scognet    shift32RightJamming( zSig, 1, &zSig );
2121129203Scognet roundAndPack:
2122129203Scognet    return roundAndPackFloat32( 0, zExp, zSig );
2123129203Scognet
2124129203Scognet}
2125129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
2126129203Scognet
2127129203Scognet/*
2128129203Scognet-------------------------------------------------------------------------------
2129129203ScognetReturns 1 if the single-precision floating-point value `a' is equal to
2130129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
2131129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2132129203Scognet-------------------------------------------------------------------------------
2133129203Scognet*/
2134129203Scognetflag float32_eq( float32 a, float32 b )
2135129203Scognet{
2136129203Scognet
2137129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2138129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2139129203Scognet       ) {
2140129203Scognet        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2141129203Scognet            float_raise( float_flag_invalid );
2142129203Scognet        }
2143129203Scognet        return 0;
2144129203Scognet    }
2145129203Scognet    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2146129203Scognet
2147129203Scognet}
2148129203Scognet
2149129203Scognet/*
2150129203Scognet-------------------------------------------------------------------------------
2151129203ScognetReturns 1 if the single-precision floating-point value `a' is less than
2152129203Scognetor equal to the corresponding value `b', and 0 otherwise.  The comparison
2153129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
2154129203ScognetArithmetic.
2155129203Scognet-------------------------------------------------------------------------------
2156129203Scognet*/
2157129203Scognetflag float32_le( float32 a, float32 b )
2158129203Scognet{
2159129203Scognet    flag aSign, bSign;
2160129203Scognet
2161129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2162129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2163129203Scognet       ) {
2164129203Scognet        float_raise( float_flag_invalid );
2165129203Scognet        return 0;
2166129203Scognet    }
2167129203Scognet    aSign = extractFloat32Sign( a );
2168129203Scognet    bSign = extractFloat32Sign( b );
2169129203Scognet    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2170129203Scognet    return ( a == b ) || ( aSign ^ ( a < b ) );
2171129203Scognet
2172129203Scognet}
2173129203Scognet
2174129203Scognet/*
2175129203Scognet-------------------------------------------------------------------------------
2176129203ScognetReturns 1 if the single-precision floating-point value `a' is less than
2177129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
2178129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2179129203Scognet-------------------------------------------------------------------------------
2180129203Scognet*/
2181129203Scognetflag float32_lt( float32 a, float32 b )
2182129203Scognet{
2183129203Scognet    flag aSign, bSign;
2184129203Scognet
2185129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2186129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2187129203Scognet       ) {
2188129203Scognet        float_raise( float_flag_invalid );
2189129203Scognet        return 0;
2190129203Scognet    }
2191129203Scognet    aSign = extractFloat32Sign( a );
2192129203Scognet    bSign = extractFloat32Sign( b );
2193129203Scognet    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2194129203Scognet    return ( a != b ) && ( aSign ^ ( a < b ) );
2195129203Scognet
2196129203Scognet}
2197129203Scognet
2198129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2199129203Scognet/*
2200129203Scognet-------------------------------------------------------------------------------
2201129203ScognetReturns 1 if the single-precision floating-point value `a' is equal to
2202129203Scognetthe corresponding value `b', and 0 otherwise.  The invalid exception is
2203129203Scognetraised if either operand is a NaN.  Otherwise, the comparison is performed
2204129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2205129203Scognet-------------------------------------------------------------------------------
2206129203Scognet*/
2207129203Scognetflag float32_eq_signaling( float32 a, float32 b )
2208129203Scognet{
2209129203Scognet
2210129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2211129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2212129203Scognet       ) {
2213129203Scognet        float_raise( float_flag_invalid );
2214129203Scognet        return 0;
2215129203Scognet    }
2216129203Scognet    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2217129203Scognet
2218129203Scognet}
2219129203Scognet
2220129203Scognet/*
2221129203Scognet-------------------------------------------------------------------------------
2222129203ScognetReturns 1 if the single-precision floating-point value `a' is less than or
2223129203Scognetequal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2224129203Scognetcause an exception.  Otherwise, the comparison is performed according to the
2225129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
2226129203Scognet-------------------------------------------------------------------------------
2227129203Scognet*/
2228129203Scognetflag float32_le_quiet( float32 a, float32 b )
2229129203Scognet{
2230129203Scognet    flag aSign, bSign;
2231129203Scognet
2232129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2233129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2234129203Scognet       ) {
2235129203Scognet        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2236129203Scognet            float_raise( float_flag_invalid );
2237129203Scognet        }
2238129203Scognet        return 0;
2239129203Scognet    }
2240129203Scognet    aSign = extractFloat32Sign( a );
2241129203Scognet    bSign = extractFloat32Sign( b );
2242129203Scognet    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2243129203Scognet    return ( a == b ) || ( aSign ^ ( a < b ) );
2244129203Scognet
2245129203Scognet}
2246129203Scognet
2247129203Scognet/*
2248129203Scognet-------------------------------------------------------------------------------
2249129203ScognetReturns 1 if the single-precision floating-point value `a' is less than
2250129203Scognetthe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2251129203Scognetexception.  Otherwise, the comparison is performed according to the IEC/IEEE
2252129203ScognetStandard for Binary Floating-Point Arithmetic.
2253129203Scognet-------------------------------------------------------------------------------
2254129203Scognet*/
2255129203Scognetflag float32_lt_quiet( float32 a, float32 b )
2256129203Scognet{
2257129203Scognet    flag aSign, bSign;
2258129203Scognet
2259129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2260129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2261129203Scognet       ) {
2262129203Scognet        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2263129203Scognet            float_raise( float_flag_invalid );
2264129203Scognet        }
2265129203Scognet        return 0;
2266129203Scognet    }
2267129203Scognet    aSign = extractFloat32Sign( a );
2268129203Scognet    bSign = extractFloat32Sign( b );
2269129203Scognet    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2270129203Scognet    return ( a != b ) && ( aSign ^ ( a < b ) );
2271129203Scognet
2272129203Scognet}
2273129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
2274129203Scognet
2275129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2276129203Scognet/*
2277129203Scognet-------------------------------------------------------------------------------
2278129203ScognetReturns the result of converting the double-precision floating-point value
2279129203Scognet`a' to the 32-bit two's complement integer format.  The conversion is
2280129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
2281129203ScognetArithmetic---which means in particular that the conversion is rounded
2282129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
2283129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
2284129203Scognetlargest integer with the same sign as `a' is returned.
2285129203Scognet-------------------------------------------------------------------------------
2286129203Scognet*/
2287129203Scognetint32 float64_to_int32( float64 a )
2288129203Scognet{
2289129203Scognet    flag aSign;
2290129203Scognet    int16 aExp, shiftCount;
2291129203Scognet    bits64 aSig;
2292129203Scognet
2293129203Scognet    aSig = extractFloat64Frac( a );
2294129203Scognet    aExp = extractFloat64Exp( a );
2295129203Scognet    aSign = extractFloat64Sign( a );
2296129203Scognet    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2297129203Scognet    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2298129203Scognet    shiftCount = 0x42C - aExp;
2299129203Scognet    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2300129203Scognet    return roundAndPackInt32( aSign, aSig );
2301129203Scognet
2302129203Scognet}
2303129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
2304129203Scognet
2305129203Scognet/*
2306129203Scognet-------------------------------------------------------------------------------
2307129203ScognetReturns the result of converting the double-precision floating-point value
2308129203Scognet`a' to the 32-bit two's complement integer format.  The conversion is
2309129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
2310129203ScognetArithmetic, except that the conversion is always rounded toward zero.
2311129203ScognetIf `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2312129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is
2313129203Scognetreturned.
2314129203Scognet-------------------------------------------------------------------------------
2315129203Scognet*/
2316129203Scognetint32 float64_to_int32_round_to_zero( float64 a )
2317129203Scognet{
2318129203Scognet    flag aSign;
2319129203Scognet    int16 aExp, shiftCount;
2320129203Scognet    bits64 aSig, savedASig;
2321129203Scognet    int32 z;
2322129203Scognet
2323129203Scognet    aSig = extractFloat64Frac( a );
2324129203Scognet    aExp = extractFloat64Exp( a );
2325129203Scognet    aSign = extractFloat64Sign( a );
2326129203Scognet    if ( 0x41E < aExp ) {
2327129203Scognet        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2328129203Scognet        goto invalid;
2329129203Scognet    }
2330129203Scognet    else if ( aExp < 0x3FF ) {
2331129203Scognet        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
2332129203Scognet        return 0;
2333129203Scognet    }
2334129203Scognet    aSig |= LIT64( 0x0010000000000000 );
2335129203Scognet    shiftCount = 0x433 - aExp;
2336129203Scognet    savedASig = aSig;
2337129203Scognet    aSig >>= shiftCount;
2338129203Scognet    z = aSig;
2339129203Scognet    if ( aSign ) z = - z;
2340129203Scognet    if ( ( z < 0 ) ^ aSign ) {
2341129203Scognet invalid:
2342129203Scognet        float_raise( float_flag_invalid );
2343129203Scognet        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2344129203Scognet    }
2345129203Scognet    if ( ( aSig<<shiftCount ) != savedASig ) {
2346129203Scognet        float_exception_flags |= float_flag_inexact;
2347129203Scognet    }
2348129203Scognet    return z;
2349129203Scognet
2350129203Scognet}
2351129203Scognet
2352129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2353129203Scognet/*
2354129203Scognet-------------------------------------------------------------------------------
2355129203ScognetReturns the result of converting the double-precision floating-point value
2356129203Scognet`a' to the 64-bit two's complement integer format.  The conversion is
2357129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
2358129203ScognetArithmetic---which means in particular that the conversion is rounded
2359129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
2360129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
2361129203Scognetlargest integer with the same sign as `a' is returned.
2362129203Scognet-------------------------------------------------------------------------------
2363129203Scognet*/
2364129203Scognetint64 float64_to_int64( float64 a )
2365129203Scognet{
2366129203Scognet    flag aSign;
2367129203Scognet    int16 aExp, shiftCount;
2368129203Scognet    bits64 aSig, aSigExtra;
2369129203Scognet
2370129203Scognet    aSig = extractFloat64Frac( a );
2371129203Scognet    aExp = extractFloat64Exp( a );
2372129203Scognet    aSign = extractFloat64Sign( a );
2373129203Scognet    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2374129203Scognet    shiftCount = 0x433 - aExp;
2375129203Scognet    if ( shiftCount <= 0 ) {
2376129203Scognet        if ( 0x43E < aExp ) {
2377129203Scognet            float_raise( float_flag_invalid );
2378129203Scognet            if (    ! aSign
2379129203Scognet                 || (    ( aExp == 0x7FF )
2380129203Scognet                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2381129203Scognet               ) {
2382129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
2383129203Scognet            }
2384129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
2385129203Scognet        }
2386129203Scognet        aSigExtra = 0;
2387129203Scognet        aSig <<= - shiftCount;
2388129203Scognet    }
2389129203Scognet    else {
2390129203Scognet        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2391129203Scognet    }
2392129203Scognet    return roundAndPackInt64( aSign, aSig, aSigExtra );
2393129203Scognet
2394129203Scognet}
2395129203Scognet
2396129203Scognet/*
2397129203Scognet-------------------------------------------------------------------------------
2398129203ScognetReturns the result of converting the double-precision floating-point value
2399129203Scognet`a' to the 64-bit two's complement integer format.  The conversion is
2400129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
2401129203ScognetArithmetic, except that the conversion is always rounded toward zero.
2402129203ScognetIf `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2403129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is
2404129203Scognetreturned.
2405129203Scognet-------------------------------------------------------------------------------
2406129203Scognet*/
2407129203Scognetint64 float64_to_int64_round_to_zero( float64 a )
2408129203Scognet{
2409129203Scognet    flag aSign;
2410129203Scognet    int16 aExp, shiftCount;
2411129203Scognet    bits64 aSig;
2412129203Scognet    int64 z;
2413129203Scognet
2414129203Scognet    aSig = extractFloat64Frac( a );
2415129203Scognet    aExp = extractFloat64Exp( a );
2416129203Scognet    aSign = extractFloat64Sign( a );
2417129203Scognet    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2418129203Scognet    shiftCount = aExp - 0x433;
2419129203Scognet    if ( 0 <= shiftCount ) {
2420129203Scognet        if ( 0x43E <= aExp ) {
2421129203Scognet            if ( a != LIT64( 0xC3E0000000000000 ) ) {
2422129203Scognet                float_raise( float_flag_invalid );
2423129203Scognet                if (    ! aSign
2424129203Scognet                     || (    ( aExp == 0x7FF )
2425129203Scognet                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2426129203Scognet                   ) {
2427129203Scognet                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2428129203Scognet                }
2429129203Scognet            }
2430129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
2431129203Scognet        }
2432129203Scognet        z = aSig<<shiftCount;
2433129203Scognet    }
2434129203Scognet    else {
2435129203Scognet        if ( aExp < 0x3FE ) {
2436129203Scognet            if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
2437129203Scognet            return 0;
2438129203Scognet        }
2439129203Scognet        z = aSig>>( - shiftCount );
2440129203Scognet        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2441129203Scognet            float_exception_flags |= float_flag_inexact;
2442129203Scognet        }
2443129203Scognet    }
2444129203Scognet    if ( aSign ) z = - z;
2445129203Scognet    return z;
2446129203Scognet
2447129203Scognet}
2448129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
2449129203Scognet
2450129203Scognet/*
2451129203Scognet-------------------------------------------------------------------------------
2452129203ScognetReturns the result of converting the double-precision floating-point value
2453129203Scognet`a' to the single-precision floating-point format.  The conversion is
2454129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
2455129203ScognetArithmetic.
2456129203Scognet-------------------------------------------------------------------------------
2457129203Scognet*/
2458129203Scognetfloat32 float64_to_float32( float64 a )
2459129203Scognet{
2460129203Scognet    flag aSign;
2461129203Scognet    int16 aExp;
2462129203Scognet    bits64 aSig;
2463129203Scognet    bits32 zSig;
2464129203Scognet
2465129203Scognet    aSig = extractFloat64Frac( a );
2466129203Scognet    aExp = extractFloat64Exp( a );
2467129203Scognet    aSign = extractFloat64Sign( a );
2468129203Scognet    if ( aExp == 0x7FF ) {
2469129203Scognet        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
2470129203Scognet        return packFloat32( aSign, 0xFF, 0 );
2471129203Scognet    }
2472129203Scognet    shift64RightJamming( aSig, 22, &aSig );
2473129203Scognet    zSig = aSig;
2474129203Scognet    if ( aExp || zSig ) {
2475129203Scognet        zSig |= 0x40000000;
2476129203Scognet        aExp -= 0x381;
2477129203Scognet    }
2478129203Scognet    return roundAndPackFloat32( aSign, aExp, zSig );
2479129203Scognet
2480129203Scognet}
2481129203Scognet
2482129203Scognet#ifdef FLOATX80
2483129203Scognet
2484129203Scognet/*
2485129203Scognet-------------------------------------------------------------------------------
2486129203ScognetReturns the result of converting the double-precision floating-point value
2487129203Scognet`a' to the extended double-precision floating-point format.  The conversion
2488129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
2489129203ScognetArithmetic.
2490129203Scognet-------------------------------------------------------------------------------
2491129203Scognet*/
2492129203Scognetfloatx80 float64_to_floatx80( float64 a )
2493129203Scognet{
2494129203Scognet    flag aSign;
2495129203Scognet    int16 aExp;
2496129203Scognet    bits64 aSig;
2497129203Scognet
2498129203Scognet    aSig = extractFloat64Frac( a );
2499129203Scognet    aExp = extractFloat64Exp( a );
2500129203Scognet    aSign = extractFloat64Sign( a );
2501129203Scognet    if ( aExp == 0x7FF ) {
2502129203Scognet        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
2503129203Scognet        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2504129203Scognet    }
2505129203Scognet    if ( aExp == 0 ) {
2506129203Scognet        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2507129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2508129203Scognet    }
2509129203Scognet    return
2510129203Scognet        packFloatx80(
2511129203Scognet            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2512129203Scognet
2513129203Scognet}
2514129203Scognet
2515129203Scognet#endif
2516129203Scognet
2517129203Scognet#ifdef FLOAT128
2518129203Scognet
2519129203Scognet/*
2520129203Scognet-------------------------------------------------------------------------------
2521129203ScognetReturns the result of converting the double-precision floating-point value
2522129203Scognet`a' to the quadruple-precision floating-point format.  The conversion is
2523129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
2524129203ScognetArithmetic.
2525129203Scognet-------------------------------------------------------------------------------
2526129203Scognet*/
2527129203Scognetfloat128 float64_to_float128( float64 a )
2528129203Scognet{
2529129203Scognet    flag aSign;
2530129203Scognet    int16 aExp;
2531129203Scognet    bits64 aSig, zSig0, zSig1;
2532129203Scognet
2533129203Scognet    aSig = extractFloat64Frac( a );
2534129203Scognet    aExp = extractFloat64Exp( a );
2535129203Scognet    aSign = extractFloat64Sign( a );
2536129203Scognet    if ( aExp == 0x7FF ) {
2537129203Scognet        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
2538129203Scognet        return packFloat128( aSign, 0x7FFF, 0, 0 );
2539129203Scognet    }
2540129203Scognet    if ( aExp == 0 ) {
2541129203Scognet        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2542129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2543129203Scognet        --aExp;
2544129203Scognet    }
2545129203Scognet    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2546129203Scognet    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2547129203Scognet
2548129203Scognet}
2549129203Scognet
2550129203Scognet#endif
2551129203Scognet
2552129203Scognet#ifndef SOFTFLOAT_FOR_GCC
2553129203Scognet/*
2554129203Scognet-------------------------------------------------------------------------------
2555129203ScognetRounds the double-precision floating-point value `a' to an integer, and
2556129203Scognetreturns the result as a double-precision floating-point value.  The
2557129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary
2558129203ScognetFloating-Point Arithmetic.
2559129203Scognet-------------------------------------------------------------------------------
2560129203Scognet*/
2561129203Scognetfloat64 float64_round_to_int( float64 a )
2562129203Scognet{
2563129203Scognet    flag aSign;
2564129203Scognet    int16 aExp;
2565129203Scognet    bits64 lastBitMask, roundBitsMask;
2566129203Scognet    int8 roundingMode;
2567129203Scognet    float64 z;
2568129203Scognet
2569129203Scognet    aExp = extractFloat64Exp( a );
2570129203Scognet    if ( 0x433 <= aExp ) {
2571129203Scognet        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2572129203Scognet            return propagateFloat64NaN( a, a );
2573129203Scognet        }
2574129203Scognet        return a;
2575129203Scognet    }
2576129203Scognet    if ( aExp < 0x3FF ) {
2577129203Scognet        if ( (bits64) ( a<<1 ) == 0 ) return a;
2578129203Scognet        float_exception_flags |= float_flag_inexact;
2579129203Scognet        aSign = extractFloat64Sign( a );
2580129203Scognet        switch ( float_rounding_mode ) {
2581129203Scognet         case float_round_nearest_even:
2582129203Scognet            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2583129203Scognet                return packFloat64( aSign, 0x3FF, 0 );
2584129203Scognet            }
2585129203Scognet            break;
2586129203Scognet	 case float_round_to_zero:
2587129203Scognet	    break;
2588129203Scognet         case float_round_down:
2589129203Scognet            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
2590129203Scognet         case float_round_up:
2591129203Scognet            return
2592129203Scognet            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
2593129203Scognet        }
2594129203Scognet        return packFloat64( aSign, 0, 0 );
2595129203Scognet    }
2596129203Scognet    lastBitMask = 1;
2597129203Scognet    lastBitMask <<= 0x433 - aExp;
2598129203Scognet    roundBitsMask = lastBitMask - 1;
2599129203Scognet    z = a;
2600129203Scognet    roundingMode = float_rounding_mode;
2601129203Scognet    if ( roundingMode == float_round_nearest_even ) {
2602129203Scognet        z += lastBitMask>>1;
2603129203Scognet        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2604129203Scognet    }
2605129203Scognet    else if ( roundingMode != float_round_to_zero ) {
2606129203Scognet        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2607129203Scognet            z += roundBitsMask;
2608129203Scognet        }
2609129203Scognet    }
2610129203Scognet    z &= ~ roundBitsMask;
2611129203Scognet    if ( z != a ) float_exception_flags |= float_flag_inexact;
2612129203Scognet    return z;
2613129203Scognet
2614129203Scognet}
2615129203Scognet#endif
2616129203Scognet
2617129203Scognet/*
2618129203Scognet-------------------------------------------------------------------------------
2619129203ScognetReturns the result of adding the absolute values of the double-precision
2620129203Scognetfloating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
2621129203Scognetbefore being returned.  `zSign' is ignored if the result is a NaN.
2622129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary
2623129203ScognetFloating-Point Arithmetic.
2624129203Scognet-------------------------------------------------------------------------------
2625129203Scognet*/
2626129203Scognetstatic float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
2627129203Scognet{
2628129203Scognet    int16 aExp, bExp, zExp;
2629129203Scognet    bits64 aSig, bSig, zSig;
2630129203Scognet    int16 expDiff;
2631129203Scognet
2632129203Scognet    aSig = extractFloat64Frac( a );
2633129203Scognet    aExp = extractFloat64Exp( a );
2634129203Scognet    bSig = extractFloat64Frac( b );
2635129203Scognet    bExp = extractFloat64Exp( b );
2636129203Scognet    expDiff = aExp - bExp;
2637129203Scognet    aSig <<= 9;
2638129203Scognet    bSig <<= 9;
2639129203Scognet    if ( 0 < expDiff ) {
2640129203Scognet        if ( aExp == 0x7FF ) {
2641129203Scognet            if ( aSig ) return propagateFloat64NaN( a, b );
2642129203Scognet            return a;
2643129203Scognet        }
2644129203Scognet        if ( bExp == 0 ) {
2645129203Scognet            --expDiff;
2646129203Scognet        }
2647129203Scognet        else {
2648129203Scognet            bSig |= LIT64( 0x2000000000000000 );
2649129203Scognet        }
2650129203Scognet        shift64RightJamming( bSig, expDiff, &bSig );
2651129203Scognet        zExp = aExp;
2652129203Scognet    }
2653129203Scognet    else if ( expDiff < 0 ) {
2654129203Scognet        if ( bExp == 0x7FF ) {
2655129203Scognet            if ( bSig ) return propagateFloat64NaN( a, b );
2656129203Scognet            return packFloat64( zSign, 0x7FF, 0 );
2657129203Scognet        }
2658129203Scognet        if ( aExp == 0 ) {
2659129203Scognet            ++expDiff;
2660129203Scognet        }
2661129203Scognet        else {
2662129203Scognet            aSig |= LIT64( 0x2000000000000000 );
2663129203Scognet        }
2664129203Scognet        shift64RightJamming( aSig, - expDiff, &aSig );
2665129203Scognet        zExp = bExp;
2666129203Scognet    }
2667129203Scognet    else {
2668129203Scognet        if ( aExp == 0x7FF ) {
2669129203Scognet            if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2670129203Scognet            return a;
2671129203Scognet        }
2672129203Scognet        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2673129203Scognet        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2674129203Scognet        zExp = aExp;
2675129203Scognet        goto roundAndPack;
2676129203Scognet    }
2677129203Scognet    aSig |= LIT64( 0x2000000000000000 );
2678129203Scognet    zSig = ( aSig + bSig )<<1;
2679129203Scognet    --zExp;
2680129203Scognet    if ( (sbits64) zSig < 0 ) {
2681129203Scognet        zSig = aSig + bSig;
2682129203Scognet        ++zExp;
2683129203Scognet    }
2684129203Scognet roundAndPack:
2685129203Scognet    return roundAndPackFloat64( zSign, zExp, zSig );
2686129203Scognet
2687129203Scognet}
2688129203Scognet
2689129203Scognet/*
2690129203Scognet-------------------------------------------------------------------------------
2691129203ScognetReturns the result of subtracting the absolute values of the double-
2692129203Scognetprecision floating-point values `a' and `b'.  If `zSign' is 1, the
2693129203Scognetdifference is negated before being returned.  `zSign' is ignored if the
2694129203Scognetresult is a NaN.  The subtraction is performed according to the IEC/IEEE
2695129203ScognetStandard for Binary Floating-Point Arithmetic.
2696129203Scognet-------------------------------------------------------------------------------
2697129203Scognet*/
2698129203Scognetstatic float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
2699129203Scognet{
2700129203Scognet    int16 aExp, bExp, zExp;
2701129203Scognet    bits64 aSig, bSig, zSig;
2702129203Scognet    int16 expDiff;
2703129203Scognet
2704129203Scognet    aSig = extractFloat64Frac( a );
2705129203Scognet    aExp = extractFloat64Exp( a );
2706129203Scognet    bSig = extractFloat64Frac( b );
2707129203Scognet    bExp = extractFloat64Exp( b );
2708129203Scognet    expDiff = aExp - bExp;
2709129203Scognet    aSig <<= 10;
2710129203Scognet    bSig <<= 10;
2711129203Scognet    if ( 0 < expDiff ) goto aExpBigger;
2712129203Scognet    if ( expDiff < 0 ) goto bExpBigger;
2713129203Scognet    if ( aExp == 0x7FF ) {
2714129203Scognet        if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2715129203Scognet        float_raise( float_flag_invalid );
2716129203Scognet        return float64_default_nan;
2717129203Scognet    }
2718129203Scognet    if ( aExp == 0 ) {
2719129203Scognet        aExp = 1;
2720129203Scognet        bExp = 1;
2721129203Scognet    }
2722129203Scognet    if ( bSig < aSig ) goto aBigger;
2723129203Scognet    if ( aSig < bSig ) goto bBigger;
2724129203Scognet    return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
2725129203Scognet bExpBigger:
2726129203Scognet    if ( bExp == 0x7FF ) {
2727129203Scognet        if ( bSig ) return propagateFloat64NaN( a, b );
2728129203Scognet        return packFloat64( zSign ^ 1, 0x7FF, 0 );
2729129203Scognet    }
2730129203Scognet    if ( aExp == 0 ) {
2731129203Scognet        ++expDiff;
2732129203Scognet    }
2733129203Scognet    else {
2734129203Scognet        aSig |= LIT64( 0x4000000000000000 );
2735129203Scognet    }
2736129203Scognet    shift64RightJamming( aSig, - expDiff, &aSig );
2737129203Scognet    bSig |= LIT64( 0x4000000000000000 );
2738129203Scognet bBigger:
2739129203Scognet    zSig = bSig - aSig;
2740129203Scognet    zExp = bExp;
2741129203Scognet    zSign ^= 1;
2742129203Scognet    goto normalizeRoundAndPack;
2743129203Scognet aExpBigger:
2744129203Scognet    if ( aExp == 0x7FF ) {
2745129203Scognet        if ( aSig ) return propagateFloat64NaN( a, b );
2746129203Scognet        return a;
2747129203Scognet    }
2748129203Scognet    if ( bExp == 0 ) {
2749129203Scognet        --expDiff;
2750129203Scognet    }
2751129203Scognet    else {
2752129203Scognet        bSig |= LIT64( 0x4000000000000000 );
2753129203Scognet    }
2754129203Scognet    shift64RightJamming( bSig, expDiff, &bSig );
2755129203Scognet    aSig |= LIT64( 0x4000000000000000 );
2756129203Scognet aBigger:
2757129203Scognet    zSig = aSig - bSig;
2758129203Scognet    zExp = aExp;
2759129203Scognet normalizeRoundAndPack:
2760129203Scognet    --zExp;
2761129203Scognet    return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
2762129203Scognet
2763129203Scognet}
2764129203Scognet
2765129203Scognet/*
2766129203Scognet-------------------------------------------------------------------------------
2767129203ScognetReturns the result of adding the double-precision floating-point values `a'
2768129203Scognetand `b'.  The operation is performed according to the IEC/IEEE Standard for
2769129203ScognetBinary Floating-Point Arithmetic.
2770129203Scognet-------------------------------------------------------------------------------
2771129203Scognet*/
2772129203Scognetfloat64 float64_add( float64 a, float64 b )
2773129203Scognet{
2774129203Scognet    flag aSign, bSign;
2775129203Scognet
2776129203Scognet    aSign = extractFloat64Sign( a );
2777129203Scognet    bSign = extractFloat64Sign( b );
2778129203Scognet    if ( aSign == bSign ) {
2779129203Scognet        return addFloat64Sigs( a, b, aSign );
2780129203Scognet    }
2781129203Scognet    else {
2782129203Scognet        return subFloat64Sigs( a, b, aSign );
2783129203Scognet    }
2784129203Scognet
2785129203Scognet}
2786129203Scognet
2787129203Scognet/*
2788129203Scognet-------------------------------------------------------------------------------
2789129203ScognetReturns the result of subtracting the double-precision floating-point values
2790129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2791129203Scognetfor Binary Floating-Point Arithmetic.
2792129203Scognet-------------------------------------------------------------------------------
2793129203Scognet*/
2794129203Scognetfloat64 float64_sub( float64 a, float64 b )
2795129203Scognet{
2796129203Scognet    flag aSign, bSign;
2797129203Scognet
2798129203Scognet    aSign = extractFloat64Sign( a );
2799129203Scognet    bSign = extractFloat64Sign( b );
2800129203Scognet    if ( aSign == bSign ) {
2801129203Scognet        return subFloat64Sigs( a, b, aSign );
2802129203Scognet    }
2803129203Scognet    else {
2804129203Scognet        return addFloat64Sigs( a, b, aSign );
2805129203Scognet    }
2806129203Scognet
2807129203Scognet}
2808129203Scognet
2809129203Scognet/*
2810129203Scognet-------------------------------------------------------------------------------
2811129203ScognetReturns the result of multiplying the double-precision floating-point values
2812129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2813129203Scognetfor Binary Floating-Point Arithmetic.
2814129203Scognet-------------------------------------------------------------------------------
2815129203Scognet*/
2816129203Scognetfloat64 float64_mul( float64 a, float64 b )
2817129203Scognet{
2818129203Scognet    flag aSign, bSign, zSign;
2819129203Scognet    int16 aExp, bExp, zExp;
2820129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
2821129203Scognet
2822129203Scognet    aSig = extractFloat64Frac( a );
2823129203Scognet    aExp = extractFloat64Exp( a );
2824129203Scognet    aSign = extractFloat64Sign( a );
2825129203Scognet    bSig = extractFloat64Frac( b );
2826129203Scognet    bExp = extractFloat64Exp( b );
2827129203Scognet    bSign = extractFloat64Sign( b );
2828129203Scognet    zSign = aSign ^ bSign;
2829129203Scognet    if ( aExp == 0x7FF ) {
2830129203Scognet        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2831129203Scognet            return propagateFloat64NaN( a, b );
2832129203Scognet        }
2833129203Scognet        if ( ( bExp | bSig ) == 0 ) {
2834129203Scognet            float_raise( float_flag_invalid );
2835129203Scognet            return float64_default_nan;
2836129203Scognet        }
2837129203Scognet        return packFloat64( zSign, 0x7FF, 0 );
2838129203Scognet    }
2839129203Scognet    if ( bExp == 0x7FF ) {
2840129203Scognet        if ( bSig ) return propagateFloat64NaN( a, b );
2841129203Scognet        if ( ( aExp | aSig ) == 0 ) {
2842129203Scognet            float_raise( float_flag_invalid );
2843129203Scognet            return float64_default_nan;
2844129203Scognet        }
2845129203Scognet        return packFloat64( zSign, 0x7FF, 0 );
2846129203Scognet    }
2847129203Scognet    if ( aExp == 0 ) {
2848129203Scognet        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2849129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2850129203Scognet    }
2851129203Scognet    if ( bExp == 0 ) {
2852129203Scognet        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2853129203Scognet        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2854129203Scognet    }
2855129203Scognet    zExp = aExp + bExp - 0x3FF;
2856129203Scognet    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2857129203Scognet    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2858129203Scognet    mul64To128( aSig, bSig, &zSig0, &zSig1 );
2859129203Scognet    zSig0 |= ( zSig1 != 0 );
2860129203Scognet    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2861129203Scognet        zSig0 <<= 1;
2862129203Scognet        --zExp;
2863129203Scognet    }
2864129203Scognet    return roundAndPackFloat64( zSign, zExp, zSig0 );
2865129203Scognet
2866129203Scognet}
2867129203Scognet
2868129203Scognet/*
2869129203Scognet-------------------------------------------------------------------------------
2870129203ScognetReturns the result of dividing the double-precision floating-point value `a'
2871129203Scognetby the corresponding value `b'.  The operation is performed according to
2872129203Scognetthe IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2873129203Scognet-------------------------------------------------------------------------------
2874129203Scognet*/
2875129203Scognetfloat64 float64_div( float64 a, float64 b )
2876129203Scognet{
2877129203Scognet    flag aSign, bSign, zSign;
2878129203Scognet    int16 aExp, bExp, zExp;
2879129203Scognet    bits64 aSig, bSig, zSig;
2880129203Scognet    bits64 rem0, rem1;
2881129203Scognet    bits64 term0, term1;
2882129203Scognet
2883129203Scognet    aSig = extractFloat64Frac( a );
2884129203Scognet    aExp = extractFloat64Exp( a );
2885129203Scognet    aSign = extractFloat64Sign( a );
2886129203Scognet    bSig = extractFloat64Frac( b );
2887129203Scognet    bExp = extractFloat64Exp( b );
2888129203Scognet    bSign = extractFloat64Sign( b );
2889129203Scognet    zSign = aSign ^ bSign;
2890129203Scognet    if ( aExp == 0x7FF ) {
2891129203Scognet        if ( aSig ) return propagateFloat64NaN( a, b );
2892129203Scognet        if ( bExp == 0x7FF ) {
2893129203Scognet            if ( bSig ) return propagateFloat64NaN( a, b );
2894129203Scognet            float_raise( float_flag_invalid );
2895129203Scognet            return float64_default_nan;
2896129203Scognet        }
2897129203Scognet        return packFloat64( zSign, 0x7FF, 0 );
2898129203Scognet    }
2899129203Scognet    if ( bExp == 0x7FF ) {
2900129203Scognet        if ( bSig ) return propagateFloat64NaN( a, b );
2901129203Scognet        return packFloat64( zSign, 0, 0 );
2902129203Scognet    }
2903129203Scognet    if ( bExp == 0 ) {
2904129203Scognet        if ( bSig == 0 ) {
2905129203Scognet            if ( ( aExp | aSig ) == 0 ) {
2906129203Scognet                float_raise( float_flag_invalid );
2907129203Scognet                return float64_default_nan;
2908129203Scognet            }
2909129203Scognet            float_raise( float_flag_divbyzero );
2910129203Scognet            return packFloat64( zSign, 0x7FF, 0 );
2911129203Scognet        }
2912129203Scognet        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2913129203Scognet    }
2914129203Scognet    if ( aExp == 0 ) {
2915129203Scognet        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2916129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2917129203Scognet    }
2918129203Scognet    zExp = aExp - bExp + 0x3FD;
2919129203Scognet    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2920129203Scognet    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2921129203Scognet    if ( bSig <= ( aSig + aSig ) ) {
2922129203Scognet        aSig >>= 1;
2923129203Scognet        ++zExp;
2924129203Scognet    }
2925129203Scognet    zSig = estimateDiv128To64( aSig, 0, bSig );
2926129203Scognet    if ( ( zSig & 0x1FF ) <= 2 ) {
2927129203Scognet        mul64To128( bSig, zSig, &term0, &term1 );
2928129203Scognet        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2929129203Scognet        while ( (sbits64) rem0 < 0 ) {
2930129203Scognet            --zSig;
2931129203Scognet            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2932129203Scognet        }
2933129203Scognet        zSig |= ( rem1 != 0 );
2934129203Scognet    }
2935129203Scognet    return roundAndPackFloat64( zSign, zExp, zSig );
2936129203Scognet
2937129203Scognet}
2938129203Scognet
2939129203Scognet#ifndef SOFTFLOAT_FOR_GCC
2940129203Scognet/*
2941129203Scognet-------------------------------------------------------------------------------
2942129203ScognetReturns the remainder of the double-precision floating-point value `a'
2943129203Scognetwith respect to the corresponding value `b'.  The operation is performed
2944129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2945129203Scognet-------------------------------------------------------------------------------
2946129203Scognet*/
2947129203Scognetfloat64 float64_rem( float64 a, float64 b )
2948129203Scognet{
2949129203Scognet    flag aSign, bSign, zSign;
2950129203Scognet    int16 aExp, bExp, expDiff;
2951129203Scognet    bits64 aSig, bSig;
2952129203Scognet    bits64 q, alternateASig;
2953129203Scognet    sbits64 sigMean;
2954129203Scognet
2955129203Scognet    aSig = extractFloat64Frac( a );
2956129203Scognet    aExp = extractFloat64Exp( a );
2957129203Scognet    aSign = extractFloat64Sign( a );
2958129203Scognet    bSig = extractFloat64Frac( b );
2959129203Scognet    bExp = extractFloat64Exp( b );
2960129203Scognet    bSign = extractFloat64Sign( b );
2961129203Scognet    if ( aExp == 0x7FF ) {
2962129203Scognet        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2963129203Scognet            return propagateFloat64NaN( a, b );
2964129203Scognet        }
2965129203Scognet        float_raise( float_flag_invalid );
2966129203Scognet        return float64_default_nan;
2967129203Scognet    }
2968129203Scognet    if ( bExp == 0x7FF ) {
2969129203Scognet        if ( bSig ) return propagateFloat64NaN( a, b );
2970129203Scognet        return a;
2971129203Scognet    }
2972129203Scognet    if ( bExp == 0 ) {
2973129203Scognet        if ( bSig == 0 ) {
2974129203Scognet            float_raise( float_flag_invalid );
2975129203Scognet            return float64_default_nan;
2976129203Scognet        }
2977129203Scognet        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2978129203Scognet    }
2979129203Scognet    if ( aExp == 0 ) {
2980129203Scognet        if ( aSig == 0 ) return a;
2981129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2982129203Scognet    }
2983129203Scognet    expDiff = aExp - bExp;
2984129203Scognet    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
2985129203Scognet    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2986129203Scognet    if ( expDiff < 0 ) {
2987129203Scognet        if ( expDiff < -1 ) return a;
2988129203Scognet        aSig >>= 1;
2989129203Scognet    }
2990129203Scognet    q = ( bSig <= aSig );
2991129203Scognet    if ( q ) aSig -= bSig;
2992129203Scognet    expDiff -= 64;
2993129203Scognet    while ( 0 < expDiff ) {
2994129203Scognet        q = estimateDiv128To64( aSig, 0, bSig );
2995129203Scognet        q = ( 2 < q ) ? q - 2 : 0;
2996129203Scognet        aSig = - ( ( bSig>>2 ) * q );
2997129203Scognet        expDiff -= 62;
2998129203Scognet    }
2999129203Scognet    expDiff += 64;
3000129203Scognet    if ( 0 < expDiff ) {
3001129203Scognet        q = estimateDiv128To64( aSig, 0, bSig );
3002129203Scognet        q = ( 2 < q ) ? q - 2 : 0;
3003129203Scognet        q >>= 64 - expDiff;
3004129203Scognet        bSig >>= 2;
3005129203Scognet        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3006129203Scognet    }
3007129203Scognet    else {
3008129203Scognet        aSig >>= 2;
3009129203Scognet        bSig >>= 2;
3010129203Scognet    }
3011129203Scognet    do {
3012129203Scognet        alternateASig = aSig;
3013129203Scognet        ++q;
3014129203Scognet        aSig -= bSig;
3015129203Scognet    } while ( 0 <= (sbits64) aSig );
3016129203Scognet    sigMean = aSig + alternateASig;
3017129203Scognet    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3018129203Scognet        aSig = alternateASig;
3019129203Scognet    }
3020129203Scognet    zSign = ( (sbits64) aSig < 0 );
3021129203Scognet    if ( zSign ) aSig = - aSig;
3022129203Scognet    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
3023129203Scognet
3024129203Scognet}
3025129203Scognet
3026129203Scognet/*
3027129203Scognet-------------------------------------------------------------------------------
3028129203ScognetReturns the square root of the double-precision floating-point value `a'.
3029129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary
3030129203ScognetFloating-Point Arithmetic.
3031129203Scognet-------------------------------------------------------------------------------
3032129203Scognet*/
3033129203Scognetfloat64 float64_sqrt( float64 a )
3034129203Scognet{
3035129203Scognet    flag aSign;
3036129203Scognet    int16 aExp, zExp;
3037129203Scognet    bits64 aSig, zSig, doubleZSig;
3038129203Scognet    bits64 rem0, rem1, term0, term1;
3039129203Scognet
3040129203Scognet    aSig = extractFloat64Frac( a );
3041129203Scognet    aExp = extractFloat64Exp( a );
3042129203Scognet    aSign = extractFloat64Sign( a );
3043129203Scognet    if ( aExp == 0x7FF ) {
3044129203Scognet        if ( aSig ) return propagateFloat64NaN( a, a );
3045129203Scognet        if ( ! aSign ) return a;
3046129203Scognet        float_raise( float_flag_invalid );
3047129203Scognet        return float64_default_nan;
3048129203Scognet    }
3049129203Scognet    if ( aSign ) {
3050129203Scognet        if ( ( aExp | aSig ) == 0 ) return a;
3051129203Scognet        float_raise( float_flag_invalid );
3052129203Scognet        return float64_default_nan;
3053129203Scognet    }
3054129203Scognet    if ( aExp == 0 ) {
3055129203Scognet        if ( aSig == 0 ) return 0;
3056129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3057129203Scognet    }
3058129203Scognet    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3059129203Scognet    aSig |= LIT64( 0x0010000000000000 );
3060129203Scognet    zSig = estimateSqrt32( aExp, aSig>>21 );
3061129203Scognet    aSig <<= 9 - ( aExp & 1 );
3062129203Scognet    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3063129203Scognet    if ( ( zSig & 0x1FF ) <= 5 ) {
3064129203Scognet        doubleZSig = zSig<<1;
3065129203Scognet        mul64To128( zSig, zSig, &term0, &term1 );
3066129203Scognet        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3067129203Scognet        while ( (sbits64) rem0 < 0 ) {
3068129203Scognet            --zSig;
3069129203Scognet            doubleZSig -= 2;
3070129203Scognet            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3071129203Scognet        }
3072129203Scognet        zSig |= ( ( rem0 | rem1 ) != 0 );
3073129203Scognet    }
3074129203Scognet    return roundAndPackFloat64( 0, zExp, zSig );
3075129203Scognet
3076129203Scognet}
3077129203Scognet#endif
3078129203Scognet
3079129203Scognet/*
3080129203Scognet-------------------------------------------------------------------------------
3081129203ScognetReturns 1 if the double-precision floating-point value `a' is equal to the
3082129203Scognetcorresponding value `b', and 0 otherwise.  The comparison is performed
3083129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3084129203Scognet-------------------------------------------------------------------------------
3085129203Scognet*/
3086129203Scognetflag float64_eq( float64 a, float64 b )
3087129203Scognet{
3088129203Scognet
3089129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3090129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3091129203Scognet       ) {
3092129203Scognet        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3093129203Scognet            float_raise( float_flag_invalid );
3094129203Scognet        }
3095129203Scognet        return 0;
3096129203Scognet    }
3097129203Scognet    return ( a == b ) ||
3098129203Scognet	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
3099129203Scognet
3100129203Scognet}
3101129203Scognet
3102129203Scognet/*
3103129203Scognet-------------------------------------------------------------------------------
3104129203ScognetReturns 1 if the double-precision floating-point value `a' is less than or
3105129203Scognetequal to the corresponding value `b', and 0 otherwise.  The comparison is
3106129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
3107129203ScognetArithmetic.
3108129203Scognet-------------------------------------------------------------------------------
3109129203Scognet*/
3110129203Scognetflag float64_le( float64 a, float64 b )
3111129203Scognet{
3112129203Scognet    flag aSign, bSign;
3113129203Scognet
3114129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3115129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3116129203Scognet       ) {
3117129203Scognet        float_raise( float_flag_invalid );
3118129203Scognet        return 0;
3119129203Scognet    }
3120129203Scognet    aSign = extractFloat64Sign( a );
3121129203Scognet    bSign = extractFloat64Sign( b );
3122129203Scognet    if ( aSign != bSign )
3123129203Scognet	return aSign ||
3124129203Scognet	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
3125129203Scognet	      0 );
3126129203Scognet    return ( a == b ) ||
3127129203Scognet	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3128129203Scognet
3129129203Scognet}
3130129203Scognet
3131129203Scognet/*
3132129203Scognet-------------------------------------------------------------------------------
3133129203ScognetReturns 1 if the double-precision floating-point value `a' is less than
3134129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
3135129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3136129203Scognet-------------------------------------------------------------------------------
3137129203Scognet*/
3138129203Scognetflag float64_lt( float64 a, float64 b )
3139129203Scognet{
3140129203Scognet    flag aSign, bSign;
3141129203Scognet
3142129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3143129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3144129203Scognet       ) {
3145129203Scognet        float_raise( float_flag_invalid );
3146129203Scognet        return 0;
3147129203Scognet    }
3148129203Scognet    aSign = extractFloat64Sign( a );
3149129203Scognet    bSign = extractFloat64Sign( b );
3150129203Scognet    if ( aSign != bSign )
3151129203Scognet	return aSign &&
3152129203Scognet	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
3153129203Scognet	      0 );
3154129203Scognet    return ( a != b ) &&
3155129203Scognet	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3156129203Scognet
3157129203Scognet}
3158129203Scognet
3159129203Scognet#ifndef SOFTFLOAT_FOR_GCC
3160129203Scognet/*
3161129203Scognet-------------------------------------------------------------------------------
3162129203ScognetReturns 1 if the double-precision floating-point value `a' is equal to the
3163129203Scognetcorresponding value `b', and 0 otherwise.  The invalid exception is raised
3164129203Scognetif either operand is a NaN.  Otherwise, the comparison is performed
3165129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3166129203Scognet-------------------------------------------------------------------------------
3167129203Scognet*/
3168129203Scognetflag float64_eq_signaling( float64 a, float64 b )
3169129203Scognet{
3170129203Scognet
3171129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3172129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3173129203Scognet       ) {
3174129203Scognet        float_raise( float_flag_invalid );
3175129203Scognet        return 0;
3176129203Scognet    }
3177129203Scognet    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
3178129203Scognet
3179129203Scognet}
3180129203Scognet
3181129203Scognet/*
3182129203Scognet-------------------------------------------------------------------------------
3183129203ScognetReturns 1 if the double-precision floating-point value `a' is less than or
3184129203Scognetequal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3185129203Scognetcause an exception.  Otherwise, the comparison is performed according to the
3186129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
3187129203Scognet-------------------------------------------------------------------------------
3188129203Scognet*/
3189129203Scognetflag float64_le_quiet( float64 a, float64 b )
3190129203Scognet{
3191129203Scognet    flag aSign, bSign;
3192129203Scognet
3193129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3194129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3195129203Scognet       ) {
3196129203Scognet        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3197129203Scognet            float_raise( float_flag_invalid );
3198129203Scognet        }
3199129203Scognet        return 0;
3200129203Scognet    }
3201129203Scognet    aSign = extractFloat64Sign( a );
3202129203Scognet    bSign = extractFloat64Sign( b );
3203129203Scognet    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
3204129203Scognet    return ( a == b ) || ( aSign ^ ( a < b ) );
3205129203Scognet
3206129203Scognet}
3207129203Scognet
3208129203Scognet/*
3209129203Scognet-------------------------------------------------------------------------------
3210129203ScognetReturns 1 if the double-precision floating-point value `a' is less than
3211129203Scognetthe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3212129203Scognetexception.  Otherwise, the comparison is performed according to the IEC/IEEE
3213129203ScognetStandard for Binary Floating-Point Arithmetic.
3214129203Scognet-------------------------------------------------------------------------------
3215129203Scognet*/
3216129203Scognetflag float64_lt_quiet( float64 a, float64 b )
3217129203Scognet{
3218129203Scognet    flag aSign, bSign;
3219129203Scognet
3220129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3221129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3222129203Scognet       ) {
3223129203Scognet        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3224129203Scognet            float_raise( float_flag_invalid );
3225129203Scognet        }
3226129203Scognet        return 0;
3227129203Scognet    }
3228129203Scognet    aSign = extractFloat64Sign( a );
3229129203Scognet    bSign = extractFloat64Sign( b );
3230129203Scognet    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
3231129203Scognet    return ( a != b ) && ( aSign ^ ( a < b ) );
3232129203Scognet
3233129203Scognet}
3234129203Scognet#endif
3235129203Scognet
3236129203Scognet#ifdef FLOATX80
3237129203Scognet
3238129203Scognet/*
3239129203Scognet-------------------------------------------------------------------------------
3240129203ScognetReturns the result of converting the extended double-precision floating-
3241129203Scognetpoint value `a' to the 32-bit two's complement integer format.  The
3242129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3243129203ScognetFloating-Point Arithmetic---which means in particular that the conversion
3244129203Scognetis rounded according to the current rounding mode.  If `a' is a NaN, the
3245129203Scognetlargest positive integer is returned.  Otherwise, if the conversion
3246129203Scognetoverflows, the largest integer with the same sign as `a' is returned.
3247129203Scognet-------------------------------------------------------------------------------
3248129203Scognet*/
3249129203Scognetint32 floatx80_to_int32( floatx80 a )
3250129203Scognet{
3251129203Scognet    flag aSign;
3252129203Scognet    int32 aExp, shiftCount;
3253129203Scognet    bits64 aSig;
3254129203Scognet
3255129203Scognet    aSig = extractFloatx80Frac( a );
3256129203Scognet    aExp = extractFloatx80Exp( a );
3257129203Scognet    aSign = extractFloatx80Sign( a );
3258129203Scognet    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3259129203Scognet    shiftCount = 0x4037 - aExp;
3260129203Scognet    if ( shiftCount <= 0 ) shiftCount = 1;
3261129203Scognet    shift64RightJamming( aSig, shiftCount, &aSig );
3262129203Scognet    return roundAndPackInt32( aSign, aSig );
3263129203Scognet
3264129203Scognet}
3265129203Scognet
3266129203Scognet/*
3267129203Scognet-------------------------------------------------------------------------------
3268129203ScognetReturns the result of converting the extended double-precision floating-
3269129203Scognetpoint value `a' to the 32-bit two's complement integer format.  The
3270129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3271129203ScognetFloating-Point Arithmetic, except that the conversion is always rounded
3272129203Scognettoward zero.  If `a' is a NaN, the largest positive integer is returned.
3273129203ScognetOtherwise, if the conversion overflows, the largest integer with the same
3274129203Scognetsign as `a' is returned.
3275129203Scognet-------------------------------------------------------------------------------
3276129203Scognet*/
3277129203Scognetint32 floatx80_to_int32_round_to_zero( floatx80 a )
3278129203Scognet{
3279129203Scognet    flag aSign;
3280129203Scognet    int32 aExp, shiftCount;
3281129203Scognet    bits64 aSig, savedASig;
3282129203Scognet    int32 z;
3283129203Scognet
3284129203Scognet    aSig = extractFloatx80Frac( a );
3285129203Scognet    aExp = extractFloatx80Exp( a );
3286129203Scognet    aSign = extractFloatx80Sign( a );
3287129203Scognet    if ( 0x401E < aExp ) {
3288129203Scognet        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3289129203Scognet        goto invalid;
3290129203Scognet    }
3291129203Scognet    else if ( aExp < 0x3FFF ) {
3292129203Scognet        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
3293129203Scognet        return 0;
3294129203Scognet    }
3295129203Scognet    shiftCount = 0x403E - aExp;
3296129203Scognet    savedASig = aSig;
3297129203Scognet    aSig >>= shiftCount;
3298129203Scognet    z = aSig;
3299129203Scognet    if ( aSign ) z = - z;
3300129203Scognet    if ( ( z < 0 ) ^ aSign ) {
3301129203Scognet invalid:
3302129203Scognet        float_raise( float_flag_invalid );
3303129203Scognet        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3304129203Scognet    }
3305129203Scognet    if ( ( aSig<<shiftCount ) != savedASig ) {
3306129203Scognet        float_exception_flags |= float_flag_inexact;
3307129203Scognet    }
3308129203Scognet    return z;
3309129203Scognet
3310129203Scognet}
3311129203Scognet
3312129203Scognet/*
3313129203Scognet-------------------------------------------------------------------------------
3314129203ScognetReturns the result of converting the extended double-precision floating-
3315129203Scognetpoint value `a' to the 64-bit two's complement integer format.  The
3316129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3317129203ScognetFloating-Point Arithmetic---which means in particular that the conversion
3318129203Scognetis rounded according to the current rounding mode.  If `a' is a NaN,
3319129203Scognetthe largest positive integer is returned.  Otherwise, if the conversion
3320129203Scognetoverflows, the largest integer with the same sign as `a' is returned.
3321129203Scognet-------------------------------------------------------------------------------
3322129203Scognet*/
3323129203Scognetint64 floatx80_to_int64( floatx80 a )
3324129203Scognet{
3325129203Scognet    flag aSign;
3326129203Scognet    int32 aExp, shiftCount;
3327129203Scognet    bits64 aSig, aSigExtra;
3328129203Scognet
3329129203Scognet    aSig = extractFloatx80Frac( a );
3330129203Scognet    aExp = extractFloatx80Exp( a );
3331129203Scognet    aSign = extractFloatx80Sign( a );
3332129203Scognet    shiftCount = 0x403E - aExp;
3333129203Scognet    if ( shiftCount <= 0 ) {
3334129203Scognet        if ( shiftCount ) {
3335129203Scognet            float_raise( float_flag_invalid );
3336129203Scognet            if (    ! aSign
3337129203Scognet                 || (    ( aExp == 0x7FFF )
3338129203Scognet                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3339129203Scognet               ) {
3340129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
3341129203Scognet            }
3342129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
3343129203Scognet        }
3344129203Scognet        aSigExtra = 0;
3345129203Scognet    }
3346129203Scognet    else {
3347129203Scognet        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3348129203Scognet    }
3349129203Scognet    return roundAndPackInt64( aSign, aSig, aSigExtra );
3350129203Scognet
3351129203Scognet}
3352129203Scognet
3353129203Scognet/*
3354129203Scognet-------------------------------------------------------------------------------
3355129203ScognetReturns the result of converting the extended double-precision floating-
3356129203Scognetpoint value `a' to the 64-bit two's complement integer format.  The
3357129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3358129203ScognetFloating-Point Arithmetic, except that the conversion is always rounded
3359129203Scognettoward zero.  If `a' is a NaN, the largest positive integer is returned.
3360129203ScognetOtherwise, if the conversion overflows, the largest integer with the same
3361129203Scognetsign as `a' is returned.
3362129203Scognet-------------------------------------------------------------------------------
3363129203Scognet*/
3364129203Scognetint64 floatx80_to_int64_round_to_zero( floatx80 a )
3365129203Scognet{
3366129203Scognet    flag aSign;
3367129203Scognet    int32 aExp, shiftCount;
3368129203Scognet    bits64 aSig;
3369129203Scognet    int64 z;
3370129203Scognet
3371129203Scognet    aSig = extractFloatx80Frac( a );
3372129203Scognet    aExp = extractFloatx80Exp( a );
3373129203Scognet    aSign = extractFloatx80Sign( a );
3374129203Scognet    shiftCount = aExp - 0x403E;
3375129203Scognet    if ( 0 <= shiftCount ) {
3376129203Scognet        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3377129203Scognet        if ( ( a.high != 0xC03E ) || aSig ) {
3378129203Scognet            float_raise( float_flag_invalid );
3379129203Scognet            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3380129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
3381129203Scognet            }
3382129203Scognet        }
3383129203Scognet        return (sbits64) LIT64( 0x8000000000000000 );
3384129203Scognet    }
3385129203Scognet    else if ( aExp < 0x3FFF ) {
3386129203Scognet        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
3387129203Scognet        return 0;
3388129203Scognet    }
3389129203Scognet    z = aSig>>( - shiftCount );
3390129203Scognet    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3391129203Scognet        float_exception_flags |= float_flag_inexact;
3392129203Scognet    }
3393129203Scognet    if ( aSign ) z = - z;
3394129203Scognet    return z;
3395129203Scognet
3396129203Scognet}
3397129203Scognet
3398129203Scognet/*
3399129203Scognet-------------------------------------------------------------------------------
3400129203ScognetReturns the result of converting the extended double-precision floating-
3401129203Scognetpoint value `a' to the single-precision floating-point format.  The
3402129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3403129203ScognetFloating-Point Arithmetic.
3404129203Scognet-------------------------------------------------------------------------------
3405129203Scognet*/
3406129203Scognetfloat32 floatx80_to_float32( floatx80 a )
3407129203Scognet{
3408129203Scognet    flag aSign;
3409129203Scognet    int32 aExp;
3410129203Scognet    bits64 aSig;
3411129203Scognet
3412129203Scognet    aSig = extractFloatx80Frac( a );
3413129203Scognet    aExp = extractFloatx80Exp( a );
3414129203Scognet    aSign = extractFloatx80Sign( a );
3415129203Scognet    if ( aExp == 0x7FFF ) {
3416129203Scognet        if ( (bits64) ( aSig<<1 ) ) {
3417129203Scognet            return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
3418129203Scognet        }
3419129203Scognet        return packFloat32( aSign, 0xFF, 0 );
3420129203Scognet    }
3421129203Scognet    shift64RightJamming( aSig, 33, &aSig );
3422129203Scognet    if ( aExp || aSig ) aExp -= 0x3F81;
3423129203Scognet    return roundAndPackFloat32( aSign, aExp, aSig );
3424129203Scognet
3425129203Scognet}
3426129203Scognet
3427129203Scognet/*
3428129203Scognet-------------------------------------------------------------------------------
3429129203ScognetReturns the result of converting the extended double-precision floating-
3430129203Scognetpoint value `a' to the double-precision floating-point format.  The
3431129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3432129203ScognetFloating-Point Arithmetic.
3433129203Scognet-------------------------------------------------------------------------------
3434129203Scognet*/
3435129203Scognetfloat64 floatx80_to_float64( floatx80 a )
3436129203Scognet{
3437129203Scognet    flag aSign;
3438129203Scognet    int32 aExp;
3439129203Scognet    bits64 aSig, zSig;
3440129203Scognet
3441129203Scognet    aSig = extractFloatx80Frac( a );
3442129203Scognet    aExp = extractFloatx80Exp( a );
3443129203Scognet    aSign = extractFloatx80Sign( a );
3444129203Scognet    if ( aExp == 0x7FFF ) {
3445129203Scognet        if ( (bits64) ( aSig<<1 ) ) {
3446129203Scognet            return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
3447129203Scognet        }
3448129203Scognet        return packFloat64( aSign, 0x7FF, 0 );
3449129203Scognet    }
3450129203Scognet    shift64RightJamming( aSig, 1, &zSig );
3451129203Scognet    if ( aExp || aSig ) aExp -= 0x3C01;
3452129203Scognet    return roundAndPackFloat64( aSign, aExp, zSig );
3453129203Scognet
3454129203Scognet}
3455129203Scognet
3456129203Scognet#ifdef FLOAT128
3457129203Scognet
3458129203Scognet/*
3459129203Scognet-------------------------------------------------------------------------------
3460129203ScognetReturns the result of converting the extended double-precision floating-
3461129203Scognetpoint value `a' to the quadruple-precision floating-point format.  The
3462129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
3463129203ScognetFloating-Point Arithmetic.
3464129203Scognet-------------------------------------------------------------------------------
3465129203Scognet*/
3466129203Scognetfloat128 floatx80_to_float128( floatx80 a )
3467129203Scognet{
3468129203Scognet    flag aSign;
3469129203Scognet    int16 aExp;
3470129203Scognet    bits64 aSig, zSig0, zSig1;
3471129203Scognet
3472129203Scognet    aSig = extractFloatx80Frac( a );
3473129203Scognet    aExp = extractFloatx80Exp( a );
3474129203Scognet    aSign = extractFloatx80Sign( a );
3475129203Scognet    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3476129203Scognet        return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
3477129203Scognet    }
3478129203Scognet    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3479129203Scognet    return packFloat128( aSign, aExp, zSig0, zSig1 );
3480129203Scognet
3481129203Scognet}
3482129203Scognet
3483129203Scognet#endif
3484129203Scognet
3485129203Scognet/*
3486129203Scognet-------------------------------------------------------------------------------
3487129203ScognetRounds the extended double-precision floating-point value `a' to an integer,
3488129203Scognetand returns the result as an extended quadruple-precision floating-point
3489129203Scognetvalue.  The operation is performed according to the IEC/IEEE Standard for
3490129203ScognetBinary Floating-Point Arithmetic.
3491129203Scognet-------------------------------------------------------------------------------
3492129203Scognet*/
3493129203Scognetfloatx80 floatx80_round_to_int( floatx80 a )
3494129203Scognet{
3495129203Scognet    flag aSign;
3496129203Scognet    int32 aExp;
3497129203Scognet    bits64 lastBitMask, roundBitsMask;
3498129203Scognet    int8 roundingMode;
3499129203Scognet    floatx80 z;
3500129203Scognet
3501129203Scognet    aExp = extractFloatx80Exp( a );
3502129203Scognet    if ( 0x403E <= aExp ) {
3503129203Scognet        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3504129203Scognet            return propagateFloatx80NaN( a, a );
3505129203Scognet        }
3506129203Scognet        return a;
3507129203Scognet    }
3508129203Scognet    if ( aExp < 0x3FFF ) {
3509129203Scognet        if (    ( aExp == 0 )
3510129203Scognet             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3511129203Scognet            return a;
3512129203Scognet        }
3513129203Scognet        float_exception_flags |= float_flag_inexact;
3514129203Scognet        aSign = extractFloatx80Sign( a );
3515129203Scognet        switch ( float_rounding_mode ) {
3516129203Scognet         case float_round_nearest_even:
3517129203Scognet            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3518129203Scognet               ) {
3519129203Scognet                return
3520129203Scognet                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3521129203Scognet            }
3522129203Scognet            break;
3523129203Scognet	 case float_round_to_zero:
3524129203Scognet	    break;
3525129203Scognet         case float_round_down:
3526129203Scognet            return
3527129203Scognet                  aSign ?
3528129203Scognet                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3529129203Scognet                : packFloatx80( 0, 0, 0 );
3530129203Scognet         case float_round_up:
3531129203Scognet            return
3532129203Scognet                  aSign ? packFloatx80( 1, 0, 0 )
3533129203Scognet                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3534129203Scognet        }
3535129203Scognet        return packFloatx80( aSign, 0, 0 );
3536129203Scognet    }
3537129203Scognet    lastBitMask = 1;
3538129203Scognet    lastBitMask <<= 0x403E - aExp;
3539129203Scognet    roundBitsMask = lastBitMask - 1;
3540129203Scognet    z = a;
3541129203Scognet    roundingMode = float_rounding_mode;
3542129203Scognet    if ( roundingMode == float_round_nearest_even ) {
3543129203Scognet        z.low += lastBitMask>>1;
3544129203Scognet        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3545129203Scognet    }
3546129203Scognet    else if ( roundingMode != float_round_to_zero ) {
3547129203Scognet        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3548129203Scognet            z.low += roundBitsMask;
3549129203Scognet        }
3550129203Scognet    }
3551129203Scognet    z.low &= ~ roundBitsMask;
3552129203Scognet    if ( z.low == 0 ) {
3553129203Scognet        ++z.high;
3554129203Scognet        z.low = LIT64( 0x8000000000000000 );
3555129203Scognet    }
3556129203Scognet    if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
3557129203Scognet    return z;
3558129203Scognet
3559129203Scognet}
3560129203Scognet
3561129203Scognet/*
3562129203Scognet-------------------------------------------------------------------------------
3563129203ScognetReturns the result of adding the absolute values of the extended double-
3564129203Scognetprecision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
3565129203Scognetnegated before being returned.  `zSign' is ignored if the result is a NaN.
3566129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary
3567129203ScognetFloating-Point Arithmetic.
3568129203Scognet-------------------------------------------------------------------------------
3569129203Scognet*/
3570129203Scognetstatic floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3571129203Scognet{
3572129203Scognet    int32 aExp, bExp, zExp;
3573129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
3574129203Scognet    int32 expDiff;
3575129203Scognet
3576129203Scognet    aSig = extractFloatx80Frac( a );
3577129203Scognet    aExp = extractFloatx80Exp( a );
3578129203Scognet    bSig = extractFloatx80Frac( b );
3579129203Scognet    bExp = extractFloatx80Exp( b );
3580129203Scognet    expDiff = aExp - bExp;
3581129203Scognet    if ( 0 < expDiff ) {
3582129203Scognet        if ( aExp == 0x7FFF ) {
3583129203Scognet            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3584129203Scognet            return a;
3585129203Scognet        }
3586129203Scognet        if ( bExp == 0 ) --expDiff;
3587129203Scognet        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3588129203Scognet        zExp = aExp;
3589129203Scognet    }
3590129203Scognet    else if ( expDiff < 0 ) {
3591129203Scognet        if ( bExp == 0x7FFF ) {
3592129203Scognet            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3593129203Scognet            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3594129203Scognet        }
3595129203Scognet        if ( aExp == 0 ) ++expDiff;
3596129203Scognet        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3597129203Scognet        zExp = bExp;
3598129203Scognet    }
3599129203Scognet    else {
3600129203Scognet        if ( aExp == 0x7FFF ) {
3601129203Scognet            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3602129203Scognet                return propagateFloatx80NaN( a, b );
3603129203Scognet            }
3604129203Scognet            return a;
3605129203Scognet        }
3606129203Scognet        zSig1 = 0;
3607129203Scognet        zSig0 = aSig + bSig;
3608129203Scognet        if ( aExp == 0 ) {
3609129203Scognet            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3610129203Scognet            goto roundAndPack;
3611129203Scognet        }
3612129203Scognet        zExp = aExp;
3613129203Scognet        goto shiftRight1;
3614129203Scognet    }
3615129203Scognet    zSig0 = aSig + bSig;
3616129203Scognet    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3617129203Scognet shiftRight1:
3618129203Scognet    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3619129203Scognet    zSig0 |= LIT64( 0x8000000000000000 );
3620129203Scognet    ++zExp;
3621129203Scognet roundAndPack:
3622129203Scognet    return
3623129203Scognet        roundAndPackFloatx80(
3624129203Scognet            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3625129203Scognet
3626129203Scognet}
3627129203Scognet
3628129203Scognet/*
3629129203Scognet-------------------------------------------------------------------------------
3630129203ScognetReturns the result of subtracting the absolute values of the extended
3631129203Scognetdouble-precision floating-point values `a' and `b'.  If `zSign' is 1, the
3632129203Scognetdifference is negated before being returned.  `zSign' is ignored if the
3633129203Scognetresult is a NaN.  The subtraction is performed according to the IEC/IEEE
3634129203ScognetStandard for Binary Floating-Point Arithmetic.
3635129203Scognet-------------------------------------------------------------------------------
3636129203Scognet*/
3637129203Scognetstatic floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3638129203Scognet{
3639129203Scognet    int32 aExp, bExp, zExp;
3640129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
3641129203Scognet    int32 expDiff;
3642129203Scognet    floatx80 z;
3643129203Scognet
3644129203Scognet    aSig = extractFloatx80Frac( a );
3645129203Scognet    aExp = extractFloatx80Exp( a );
3646129203Scognet    bSig = extractFloatx80Frac( b );
3647129203Scognet    bExp = extractFloatx80Exp( b );
3648129203Scognet    expDiff = aExp - bExp;
3649129203Scognet    if ( 0 < expDiff ) goto aExpBigger;
3650129203Scognet    if ( expDiff < 0 ) goto bExpBigger;
3651129203Scognet    if ( aExp == 0x7FFF ) {
3652129203Scognet        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3653129203Scognet            return propagateFloatx80NaN( a, b );
3654129203Scognet        }
3655129203Scognet        float_raise( float_flag_invalid );
3656129203Scognet        z.low = floatx80_default_nan_low;
3657129203Scognet        z.high = floatx80_default_nan_high;
3658129203Scognet        return z;
3659129203Scognet    }
3660129203Scognet    if ( aExp == 0 ) {
3661129203Scognet        aExp = 1;
3662129203Scognet        bExp = 1;
3663129203Scognet    }
3664129203Scognet    zSig1 = 0;
3665129203Scognet    if ( bSig < aSig ) goto aBigger;
3666129203Scognet    if ( aSig < bSig ) goto bBigger;
3667129203Scognet    return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
3668129203Scognet bExpBigger:
3669129203Scognet    if ( bExp == 0x7FFF ) {
3670129203Scognet        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3671129203Scognet        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3672129203Scognet    }
3673129203Scognet    if ( aExp == 0 ) ++expDiff;
3674129203Scognet    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3675129203Scognet bBigger:
3676129203Scognet    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3677129203Scognet    zExp = bExp;
3678129203Scognet    zSign ^= 1;
3679129203Scognet    goto normalizeRoundAndPack;
3680129203Scognet aExpBigger:
3681129203Scognet    if ( aExp == 0x7FFF ) {
3682129203Scognet        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3683129203Scognet        return a;
3684129203Scognet    }
3685129203Scognet    if ( bExp == 0 ) --expDiff;
3686129203Scognet    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3687129203Scognet aBigger:
3688129203Scognet    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3689129203Scognet    zExp = aExp;
3690129203Scognet normalizeRoundAndPack:
3691129203Scognet    return
3692129203Scognet        normalizeRoundAndPackFloatx80(
3693129203Scognet            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3694129203Scognet
3695129203Scognet}
3696129203Scognet
3697129203Scognet/*
3698129203Scognet-------------------------------------------------------------------------------
3699129203ScognetReturns the result of adding the extended double-precision floating-point
3700129203Scognetvalues `a' and `b'.  The operation is performed according to the IEC/IEEE
3701129203ScognetStandard for Binary Floating-Point Arithmetic.
3702129203Scognet-------------------------------------------------------------------------------
3703129203Scognet*/
3704129203Scognetfloatx80 floatx80_add( floatx80 a, floatx80 b )
3705129203Scognet{
3706129203Scognet    flag aSign, bSign;
3707129203Scognet
3708129203Scognet    aSign = extractFloatx80Sign( a );
3709129203Scognet    bSign = extractFloatx80Sign( b );
3710129203Scognet    if ( aSign == bSign ) {
3711129203Scognet        return addFloatx80Sigs( a, b, aSign );
3712129203Scognet    }
3713129203Scognet    else {
3714129203Scognet        return subFloatx80Sigs( a, b, aSign );
3715129203Scognet    }
3716129203Scognet
3717129203Scognet}
3718129203Scognet
3719129203Scognet/*
3720129203Scognet-------------------------------------------------------------------------------
3721129203ScognetReturns the result of subtracting the extended double-precision floating-
3722129203Scognetpoint values `a' and `b'.  The operation is performed according to the
3723129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
3724129203Scognet-------------------------------------------------------------------------------
3725129203Scognet*/
3726129203Scognetfloatx80 floatx80_sub( floatx80 a, floatx80 b )
3727129203Scognet{
3728129203Scognet    flag aSign, bSign;
3729129203Scognet
3730129203Scognet    aSign = extractFloatx80Sign( a );
3731129203Scognet    bSign = extractFloatx80Sign( b );
3732129203Scognet    if ( aSign == bSign ) {
3733129203Scognet        return subFloatx80Sigs( a, b, aSign );
3734129203Scognet    }
3735129203Scognet    else {
3736129203Scognet        return addFloatx80Sigs( a, b, aSign );
3737129203Scognet    }
3738129203Scognet
3739129203Scognet}
3740129203Scognet
3741129203Scognet/*
3742129203Scognet-------------------------------------------------------------------------------
3743129203ScognetReturns the result of multiplying the extended double-precision floating-
3744129203Scognetpoint values `a' and `b'.  The operation is performed according to the
3745129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
3746129203Scognet-------------------------------------------------------------------------------
3747129203Scognet*/
3748129203Scognetfloatx80 floatx80_mul( floatx80 a, floatx80 b )
3749129203Scognet{
3750129203Scognet    flag aSign, bSign, zSign;
3751129203Scognet    int32 aExp, bExp, zExp;
3752129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
3753129203Scognet    floatx80 z;
3754129203Scognet
3755129203Scognet    aSig = extractFloatx80Frac( a );
3756129203Scognet    aExp = extractFloatx80Exp( a );
3757129203Scognet    aSign = extractFloatx80Sign( a );
3758129203Scognet    bSig = extractFloatx80Frac( b );
3759129203Scognet    bExp = extractFloatx80Exp( b );
3760129203Scognet    bSign = extractFloatx80Sign( b );
3761129203Scognet    zSign = aSign ^ bSign;
3762129203Scognet    if ( aExp == 0x7FFF ) {
3763129203Scognet        if (    (bits64) ( aSig<<1 )
3764129203Scognet             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3765129203Scognet            return propagateFloatx80NaN( a, b );
3766129203Scognet        }
3767129203Scognet        if ( ( bExp | bSig ) == 0 ) goto invalid;
3768129203Scognet        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3769129203Scognet    }
3770129203Scognet    if ( bExp == 0x7FFF ) {
3771129203Scognet        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3772129203Scognet        if ( ( aExp | aSig ) == 0 ) {
3773129203Scognet invalid:
3774129203Scognet            float_raise( float_flag_invalid );
3775129203Scognet            z.low = floatx80_default_nan_low;
3776129203Scognet            z.high = floatx80_default_nan_high;
3777129203Scognet            return z;
3778129203Scognet        }
3779129203Scognet        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3780129203Scognet    }
3781129203Scognet    if ( aExp == 0 ) {
3782129203Scognet        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3783129203Scognet        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3784129203Scognet    }
3785129203Scognet    if ( bExp == 0 ) {
3786129203Scognet        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3787129203Scognet        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3788129203Scognet    }
3789129203Scognet    zExp = aExp + bExp - 0x3FFE;
3790129203Scognet    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3791129203Scognet    if ( 0 < (sbits64) zSig0 ) {
3792129203Scognet        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3793129203Scognet        --zExp;
3794129203Scognet    }
3795129203Scognet    return
3796129203Scognet        roundAndPackFloatx80(
3797129203Scognet            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3798129203Scognet
3799129203Scognet}
3800129203Scognet
3801129203Scognet/*
3802129203Scognet-------------------------------------------------------------------------------
3803129203ScognetReturns the result of dividing the extended double-precision floating-point
3804129203Scognetvalue `a' by the corresponding value `b'.  The operation is performed
3805129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3806129203Scognet-------------------------------------------------------------------------------
3807129203Scognet*/
3808129203Scognetfloatx80 floatx80_div( floatx80 a, floatx80 b )
3809129203Scognet{
3810129203Scognet    flag aSign, bSign, zSign;
3811129203Scognet    int32 aExp, bExp, zExp;
3812129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
3813129203Scognet    bits64 rem0, rem1, rem2, term0, term1, term2;
3814129203Scognet    floatx80 z;
3815129203Scognet
3816129203Scognet    aSig = extractFloatx80Frac( a );
3817129203Scognet    aExp = extractFloatx80Exp( a );
3818129203Scognet    aSign = extractFloatx80Sign( a );
3819129203Scognet    bSig = extractFloatx80Frac( b );
3820129203Scognet    bExp = extractFloatx80Exp( b );
3821129203Scognet    bSign = extractFloatx80Sign( b );
3822129203Scognet    zSign = aSign ^ bSign;
3823129203Scognet    if ( aExp == 0x7FFF ) {
3824129203Scognet        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3825129203Scognet        if ( bExp == 0x7FFF ) {
3826129203Scognet            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3827129203Scognet            goto invalid;
3828129203Scognet        }
3829129203Scognet        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3830129203Scognet    }
3831129203Scognet    if ( bExp == 0x7FFF ) {
3832129203Scognet        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3833129203Scognet        return packFloatx80( zSign, 0, 0 );
3834129203Scognet    }
3835129203Scognet    if ( bExp == 0 ) {
3836129203Scognet        if ( bSig == 0 ) {
3837129203Scognet            if ( ( aExp | aSig ) == 0 ) {
3838129203Scognet invalid:
3839129203Scognet                float_raise( float_flag_invalid );
3840129203Scognet                z.low = floatx80_default_nan_low;
3841129203Scognet                z.high = floatx80_default_nan_high;
3842129203Scognet                return z;
3843129203Scognet            }
3844129203Scognet            float_raise( float_flag_divbyzero );
3845129203Scognet            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3846129203Scognet        }
3847129203Scognet        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3848129203Scognet    }
3849129203Scognet    if ( aExp == 0 ) {
3850129203Scognet        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3851129203Scognet        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3852129203Scognet    }
3853129203Scognet    zExp = aExp - bExp + 0x3FFE;
3854129203Scognet    rem1 = 0;
3855129203Scognet    if ( bSig <= aSig ) {
3856129203Scognet        shift128Right( aSig, 0, 1, &aSig, &rem1 );
3857129203Scognet        ++zExp;
3858129203Scognet    }
3859129203Scognet    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3860129203Scognet    mul64To128( bSig, zSig0, &term0, &term1 );
3861129203Scognet    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3862129203Scognet    while ( (sbits64) rem0 < 0 ) {
3863129203Scognet        --zSig0;
3864129203Scognet        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3865129203Scognet    }
3866129203Scognet    zSig1 = estimateDiv128To64( rem1, 0, bSig );
3867129203Scognet    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3868129203Scognet        mul64To128( bSig, zSig1, &term1, &term2 );
3869129203Scognet        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3870129203Scognet        while ( (sbits64) rem1 < 0 ) {
3871129203Scognet            --zSig1;
3872129203Scognet            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3873129203Scognet        }
3874129203Scognet        zSig1 |= ( ( rem1 | rem2 ) != 0 );
3875129203Scognet    }
3876129203Scognet    return
3877129203Scognet        roundAndPackFloatx80(
3878129203Scognet            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3879129203Scognet
3880129203Scognet}
3881129203Scognet
3882129203Scognet/*
3883129203Scognet-------------------------------------------------------------------------------
3884129203ScognetReturns the remainder of the extended double-precision floating-point value
3885129203Scognet`a' with respect to the corresponding value `b'.  The operation is performed
3886129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3887129203Scognet-------------------------------------------------------------------------------
3888129203Scognet*/
3889129203Scognetfloatx80 floatx80_rem( floatx80 a, floatx80 b )
3890129203Scognet{
3891129203Scognet    flag aSign, bSign, zSign;
3892129203Scognet    int32 aExp, bExp, expDiff;
3893129203Scognet    bits64 aSig0, aSig1, bSig;
3894129203Scognet    bits64 q, term0, term1, alternateASig0, alternateASig1;
3895129203Scognet    floatx80 z;
3896129203Scognet
3897129203Scognet    aSig0 = extractFloatx80Frac( a );
3898129203Scognet    aExp = extractFloatx80Exp( a );
3899129203Scognet    aSign = extractFloatx80Sign( a );
3900129203Scognet    bSig = extractFloatx80Frac( b );
3901129203Scognet    bExp = extractFloatx80Exp( b );
3902129203Scognet    bSign = extractFloatx80Sign( b );
3903129203Scognet    if ( aExp == 0x7FFF ) {
3904129203Scognet        if (    (bits64) ( aSig0<<1 )
3905129203Scognet             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3906129203Scognet            return propagateFloatx80NaN( a, b );
3907129203Scognet        }
3908129203Scognet        goto invalid;
3909129203Scognet    }
3910129203Scognet    if ( bExp == 0x7FFF ) {
3911129203Scognet        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3912129203Scognet        return a;
3913129203Scognet    }
3914129203Scognet    if ( bExp == 0 ) {
3915129203Scognet        if ( bSig == 0 ) {
3916129203Scognet invalid:
3917129203Scognet            float_raise( float_flag_invalid );
3918129203Scognet            z.low = floatx80_default_nan_low;
3919129203Scognet            z.high = floatx80_default_nan_high;
3920129203Scognet            return z;
3921129203Scognet        }
3922129203Scognet        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3923129203Scognet    }
3924129203Scognet    if ( aExp == 0 ) {
3925129203Scognet        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3926129203Scognet        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3927129203Scognet    }
3928129203Scognet    bSig |= LIT64( 0x8000000000000000 );
3929129203Scognet    zSign = aSign;
3930129203Scognet    expDiff = aExp - bExp;
3931129203Scognet    aSig1 = 0;
3932129203Scognet    if ( expDiff < 0 ) {
3933129203Scognet        if ( expDiff < -1 ) return a;
3934129203Scognet        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3935129203Scognet        expDiff = 0;
3936129203Scognet    }
3937129203Scognet    q = ( bSig <= aSig0 );
3938129203Scognet    if ( q ) aSig0 -= bSig;
3939129203Scognet    expDiff -= 64;
3940129203Scognet    while ( 0 < expDiff ) {
3941129203Scognet        q = estimateDiv128To64( aSig0, aSig1, bSig );
3942129203Scognet        q = ( 2 < q ) ? q - 2 : 0;
3943129203Scognet        mul64To128( bSig, q, &term0, &term1 );
3944129203Scognet        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3945129203Scognet        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3946129203Scognet        expDiff -= 62;
3947129203Scognet    }
3948129203Scognet    expDiff += 64;
3949129203Scognet    if ( 0 < expDiff ) {
3950129203Scognet        q = estimateDiv128To64( aSig0, aSig1, bSig );
3951129203Scognet        q = ( 2 < q ) ? q - 2 : 0;
3952129203Scognet        q >>= 64 - expDiff;
3953129203Scognet        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
3954129203Scognet        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3955129203Scognet        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3956129203Scognet        while ( le128( term0, term1, aSig0, aSig1 ) ) {
3957129203Scognet            ++q;
3958129203Scognet            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3959129203Scognet        }
3960129203Scognet    }
3961129203Scognet    else {
3962129203Scognet        term1 = 0;
3963129203Scognet        term0 = bSig;
3964129203Scognet    }
3965129203Scognet    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
3966129203Scognet    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
3967129203Scognet         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
3968129203Scognet              && ( q & 1 ) )
3969129203Scognet       ) {
3970129203Scognet        aSig0 = alternateASig0;
3971129203Scognet        aSig1 = alternateASig1;
3972129203Scognet        zSign = ! zSign;
3973129203Scognet    }
3974129203Scognet    return
3975129203Scognet        normalizeRoundAndPackFloatx80(
3976129203Scognet            80, zSign, bExp + expDiff, aSig0, aSig1 );
3977129203Scognet
3978129203Scognet}
3979129203Scognet
3980129203Scognet/*
3981129203Scognet-------------------------------------------------------------------------------
3982129203ScognetReturns the square root of the extended double-precision floating-point
3983129203Scognetvalue `a'.  The operation is performed according to the IEC/IEEE Standard
3984129203Scognetfor Binary Floating-Point Arithmetic.
3985129203Scognet-------------------------------------------------------------------------------
3986129203Scognet*/
3987129203Scognetfloatx80 floatx80_sqrt( floatx80 a )
3988129203Scognet{
3989129203Scognet    flag aSign;
3990129203Scognet    int32 aExp, zExp;
3991129203Scognet    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
3992129203Scognet    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3993129203Scognet    floatx80 z;
3994129203Scognet
3995129203Scognet    aSig0 = extractFloatx80Frac( a );
3996129203Scognet    aExp = extractFloatx80Exp( a );
3997129203Scognet    aSign = extractFloatx80Sign( a );
3998129203Scognet    if ( aExp == 0x7FFF ) {
3999129203Scognet        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
4000129203Scognet        if ( ! aSign ) return a;
4001129203Scognet        goto invalid;
4002129203Scognet    }
4003129203Scognet    if ( aSign ) {
4004129203Scognet        if ( ( aExp | aSig0 ) == 0 ) return a;
4005129203Scognet invalid:
4006129203Scognet        float_raise( float_flag_invalid );
4007129203Scognet        z.low = floatx80_default_nan_low;
4008129203Scognet        z.high = floatx80_default_nan_high;
4009129203Scognet        return z;
4010129203Scognet    }
4011129203Scognet    if ( aExp == 0 ) {
4012129203Scognet        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4013129203Scognet        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4014129203Scognet    }
4015129203Scognet    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4016129203Scognet    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4017129203Scognet    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4018129203Scognet    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4019129203Scognet    doubleZSig0 = zSig0<<1;
4020129203Scognet    mul64To128( zSig0, zSig0, &term0, &term1 );
4021129203Scognet    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4022129203Scognet    while ( (sbits64) rem0 < 0 ) {
4023129203Scognet        --zSig0;
4024129203Scognet        doubleZSig0 -= 2;
4025129203Scognet        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4026129203Scognet    }
4027129203Scognet    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4028129203Scognet    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4029129203Scognet        if ( zSig1 == 0 ) zSig1 = 1;
4030129203Scognet        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4031129203Scognet        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4032129203Scognet        mul64To128( zSig1, zSig1, &term2, &term3 );
4033129203Scognet        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4034129203Scognet        while ( (sbits64) rem1 < 0 ) {
4035129203Scognet            --zSig1;
4036129203Scognet            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4037129203Scognet            term3 |= 1;
4038129203Scognet            term2 |= doubleZSig0;
4039129203Scognet            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4040129203Scognet        }
4041129203Scognet        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4042129203Scognet    }
4043129203Scognet    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4044129203Scognet    zSig0 |= doubleZSig0;
4045129203Scognet    return
4046129203Scognet        roundAndPackFloatx80(
4047129203Scognet            floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
4048129203Scognet
4049129203Scognet}
4050129203Scognet
4051129203Scognet/*
4052129203Scognet-------------------------------------------------------------------------------
4053129203ScognetReturns 1 if the extended double-precision floating-point value `a' is
4054129203Scognetequal to the corresponding value `b', and 0 otherwise.  The comparison is
4055129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
4056129203ScognetArithmetic.
4057129203Scognet-------------------------------------------------------------------------------
4058129203Scognet*/
4059129203Scognetflag floatx80_eq( floatx80 a, floatx80 b )
4060129203Scognet{
4061129203Scognet
4062129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4063129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4064129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4065129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4066129203Scognet       ) {
4067129203Scognet        if (    floatx80_is_signaling_nan( a )
4068129203Scognet             || floatx80_is_signaling_nan( b ) ) {
4069129203Scognet            float_raise( float_flag_invalid );
4070129203Scognet        }
4071129203Scognet        return 0;
4072129203Scognet    }
4073129203Scognet    return
4074129203Scognet           ( a.low == b.low )
4075129203Scognet        && (    ( a.high == b.high )
4076129203Scognet             || (    ( a.low == 0 )
4077129203Scognet                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4078129203Scognet           );
4079129203Scognet
4080129203Scognet}
4081129203Scognet
4082129203Scognet/*
4083129203Scognet-------------------------------------------------------------------------------
4084129203ScognetReturns 1 if the extended double-precision floating-point value `a' is
4085129203Scognetless than or equal to the corresponding value `b', and 0 otherwise.  The
4086129203Scognetcomparison is performed according to the IEC/IEEE Standard for Binary
4087129203ScognetFloating-Point Arithmetic.
4088129203Scognet-------------------------------------------------------------------------------
4089129203Scognet*/
4090129203Scognetflag floatx80_le( floatx80 a, floatx80 b )
4091129203Scognet{
4092129203Scognet    flag aSign, bSign;
4093129203Scognet
4094129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4095129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4096129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4097129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4098129203Scognet       ) {
4099129203Scognet        float_raise( float_flag_invalid );
4100129203Scognet        return 0;
4101129203Scognet    }
4102129203Scognet    aSign = extractFloatx80Sign( a );
4103129203Scognet    bSign = extractFloatx80Sign( b );
4104129203Scognet    if ( aSign != bSign ) {
4105129203Scognet        return
4106129203Scognet               aSign
4107129203Scognet            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4108129203Scognet                 == 0 );
4109129203Scognet    }
4110129203Scognet    return
4111129203Scognet          aSign ? le128( b.high, b.low, a.high, a.low )
4112129203Scognet        : le128( a.high, a.low, b.high, b.low );
4113129203Scognet
4114129203Scognet}
4115129203Scognet
4116129203Scognet/*
4117129203Scognet-------------------------------------------------------------------------------
4118129203ScognetReturns 1 if the extended double-precision floating-point value `a' is
4119129203Scognetless than the corresponding value `b', and 0 otherwise.  The comparison
4120129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4121129203ScognetArithmetic.
4122129203Scognet-------------------------------------------------------------------------------
4123129203Scognet*/
4124129203Scognetflag floatx80_lt( floatx80 a, floatx80 b )
4125129203Scognet{
4126129203Scognet    flag aSign, bSign;
4127129203Scognet
4128129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4129129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4130129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4131129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4132129203Scognet       ) {
4133129203Scognet        float_raise( float_flag_invalid );
4134129203Scognet        return 0;
4135129203Scognet    }
4136129203Scognet    aSign = extractFloatx80Sign( a );
4137129203Scognet    bSign = extractFloatx80Sign( b );
4138129203Scognet    if ( aSign != bSign ) {
4139129203Scognet        return
4140129203Scognet               aSign
4141129203Scognet            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4142129203Scognet                 != 0 );
4143129203Scognet    }
4144129203Scognet    return
4145129203Scognet          aSign ? lt128( b.high, b.low, a.high, a.low )
4146129203Scognet        : lt128( a.high, a.low, b.high, b.low );
4147129203Scognet
4148129203Scognet}
4149129203Scognet
4150129203Scognet/*
4151129203Scognet-------------------------------------------------------------------------------
4152129203ScognetReturns 1 if the extended double-precision floating-point value `a' is equal
4153129203Scognetto the corresponding value `b', and 0 otherwise.  The invalid exception is
4154129203Scognetraised if either operand is a NaN.  Otherwise, the comparison is performed
4155129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4156129203Scognet-------------------------------------------------------------------------------
4157129203Scognet*/
4158129203Scognetflag floatx80_eq_signaling( floatx80 a, floatx80 b )
4159129203Scognet{
4160129203Scognet
4161129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4162129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4163129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4164129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4165129203Scognet       ) {
4166129203Scognet        float_raise( float_flag_invalid );
4167129203Scognet        return 0;
4168129203Scognet    }
4169129203Scognet    return
4170129203Scognet           ( a.low == b.low )
4171129203Scognet        && (    ( a.high == b.high )
4172129203Scognet             || (    ( a.low == 0 )
4173129203Scognet                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4174129203Scognet           );
4175129203Scognet
4176129203Scognet}
4177129203Scognet
4178129203Scognet/*
4179129203Scognet-------------------------------------------------------------------------------
4180129203ScognetReturns 1 if the extended double-precision floating-point value `a' is less
4181129203Scognetthan or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4182129203Scognetdo not cause an exception.  Otherwise, the comparison is performed according
4183129203Scognetto the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4184129203Scognet-------------------------------------------------------------------------------
4185129203Scognet*/
4186129203Scognetflag floatx80_le_quiet( floatx80 a, floatx80 b )
4187129203Scognet{
4188129203Scognet    flag aSign, bSign;
4189129203Scognet
4190129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4191129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4192129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4193129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4194129203Scognet       ) {
4195129203Scognet        if (    floatx80_is_signaling_nan( a )
4196129203Scognet             || floatx80_is_signaling_nan( b ) ) {
4197129203Scognet            float_raise( float_flag_invalid );
4198129203Scognet        }
4199129203Scognet        return 0;
4200129203Scognet    }
4201129203Scognet    aSign = extractFloatx80Sign( a );
4202129203Scognet    bSign = extractFloatx80Sign( b );
4203129203Scognet    if ( aSign != bSign ) {
4204129203Scognet        return
4205129203Scognet               aSign
4206129203Scognet            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4207129203Scognet                 == 0 );
4208129203Scognet    }
4209129203Scognet    return
4210129203Scognet          aSign ? le128( b.high, b.low, a.high, a.low )
4211129203Scognet        : le128( a.high, a.low, b.high, b.low );
4212129203Scognet
4213129203Scognet}
4214129203Scognet
4215129203Scognet/*
4216129203Scognet-------------------------------------------------------------------------------
4217129203ScognetReturns 1 if the extended double-precision floating-point value `a' is less
4218129203Scognetthan the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4219129203Scognetan exception.  Otherwise, the comparison is performed according to the
4220129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
4221129203Scognet-------------------------------------------------------------------------------
4222129203Scognet*/
4223129203Scognetflag floatx80_lt_quiet( floatx80 a, floatx80 b )
4224129203Scognet{
4225129203Scognet    flag aSign, bSign;
4226129203Scognet
4227129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4228129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4229129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4230129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4231129203Scognet       ) {
4232129203Scognet        if (    floatx80_is_signaling_nan( a )
4233129203Scognet             || floatx80_is_signaling_nan( b ) ) {
4234129203Scognet            float_raise( float_flag_invalid );
4235129203Scognet        }
4236129203Scognet        return 0;
4237129203Scognet    }
4238129203Scognet    aSign = extractFloatx80Sign( a );
4239129203Scognet    bSign = extractFloatx80Sign( b );
4240129203Scognet    if ( aSign != bSign ) {
4241129203Scognet        return
4242129203Scognet               aSign
4243129203Scognet            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4244129203Scognet                 != 0 );
4245129203Scognet    }
4246129203Scognet    return
4247129203Scognet          aSign ? lt128( b.high, b.low, a.high, a.low )
4248129203Scognet        : lt128( a.high, a.low, b.high, b.low );
4249129203Scognet
4250129203Scognet}
4251129203Scognet
4252129203Scognet#endif
4253129203Scognet
4254129203Scognet#ifdef FLOAT128
4255129203Scognet
4256129203Scognet/*
4257129203Scognet-------------------------------------------------------------------------------
4258129203ScognetReturns the result of converting the quadruple-precision floating-point
4259129203Scognetvalue `a' to the 32-bit two's complement integer format.  The conversion
4260129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4261129203ScognetArithmetic---which means in particular that the conversion is rounded
4262129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
4263129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
4264129203Scognetlargest integer with the same sign as `a' is returned.
4265129203Scognet-------------------------------------------------------------------------------
4266129203Scognet*/
4267129203Scognetint32 float128_to_int32( float128 a )
4268129203Scognet{
4269129203Scognet    flag aSign;
4270129203Scognet    int32 aExp, shiftCount;
4271129203Scognet    bits64 aSig0, aSig1;
4272129203Scognet
4273129203Scognet    aSig1 = extractFloat128Frac1( a );
4274129203Scognet    aSig0 = extractFloat128Frac0( a );
4275129203Scognet    aExp = extractFloat128Exp( a );
4276129203Scognet    aSign = extractFloat128Sign( a );
4277129203Scognet    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4278129203Scognet    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4279129203Scognet    aSig0 |= ( aSig1 != 0 );
4280129203Scognet    shiftCount = 0x4028 - aExp;
4281129203Scognet    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4282129203Scognet    return roundAndPackInt32( aSign, aSig0 );
4283129203Scognet
4284129203Scognet}
4285129203Scognet
4286129203Scognet/*
4287129203Scognet-------------------------------------------------------------------------------
4288129203ScognetReturns the result of converting the quadruple-precision floating-point
4289129203Scognetvalue `a' to the 32-bit two's complement integer format.  The conversion
4290129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4291129203ScognetArithmetic, except that the conversion is always rounded toward zero.  If
4292129203Scognet`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4293129203Scognetconversion overflows, the largest integer with the same sign as `a' is
4294129203Scognetreturned.
4295129203Scognet-------------------------------------------------------------------------------
4296129203Scognet*/
4297129203Scognetint32 float128_to_int32_round_to_zero( float128 a )
4298129203Scognet{
4299129203Scognet    flag aSign;
4300129203Scognet    int32 aExp, shiftCount;
4301129203Scognet    bits64 aSig0, aSig1, savedASig;
4302129203Scognet    int32 z;
4303129203Scognet
4304129203Scognet    aSig1 = extractFloat128Frac1( a );
4305129203Scognet    aSig0 = extractFloat128Frac0( a );
4306129203Scognet    aExp = extractFloat128Exp( a );
4307129203Scognet    aSign = extractFloat128Sign( a );
4308129203Scognet    aSig0 |= ( aSig1 != 0 );
4309129203Scognet    if ( 0x401E < aExp ) {
4310129203Scognet        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4311129203Scognet        goto invalid;
4312129203Scognet    }
4313129203Scognet    else if ( aExp < 0x3FFF ) {
4314129203Scognet        if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
4315129203Scognet        return 0;
4316129203Scognet    }
4317129203Scognet    aSig0 |= LIT64( 0x0001000000000000 );
4318129203Scognet    shiftCount = 0x402F - aExp;
4319129203Scognet    savedASig = aSig0;
4320129203Scognet    aSig0 >>= shiftCount;
4321129203Scognet    z = aSig0;
4322129203Scognet    if ( aSign ) z = - z;
4323129203Scognet    if ( ( z < 0 ) ^ aSign ) {
4324129203Scognet invalid:
4325129203Scognet        float_raise( float_flag_invalid );
4326129203Scognet        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4327129203Scognet    }
4328129203Scognet    if ( ( aSig0<<shiftCount ) != savedASig ) {
4329129203Scognet        float_exception_flags |= float_flag_inexact;
4330129203Scognet    }
4331129203Scognet    return z;
4332129203Scognet
4333129203Scognet}
4334129203Scognet
4335129203Scognet/*
4336129203Scognet-------------------------------------------------------------------------------
4337129203ScognetReturns the result of converting the quadruple-precision floating-point
4338129203Scognetvalue `a' to the 64-bit two's complement integer format.  The conversion
4339129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4340129203ScognetArithmetic---which means in particular that the conversion is rounded
4341129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
4342129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
4343129203Scognetlargest integer with the same sign as `a' is returned.
4344129203Scognet-------------------------------------------------------------------------------
4345129203Scognet*/
4346129203Scognetint64 float128_to_int64( float128 a )
4347129203Scognet{
4348129203Scognet    flag aSign;
4349129203Scognet    int32 aExp, shiftCount;
4350129203Scognet    bits64 aSig0, aSig1;
4351129203Scognet
4352129203Scognet    aSig1 = extractFloat128Frac1( a );
4353129203Scognet    aSig0 = extractFloat128Frac0( a );
4354129203Scognet    aExp = extractFloat128Exp( a );
4355129203Scognet    aSign = extractFloat128Sign( a );
4356129203Scognet    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4357129203Scognet    shiftCount = 0x402F - aExp;
4358129203Scognet    if ( shiftCount <= 0 ) {
4359129203Scognet        if ( 0x403E < aExp ) {
4360129203Scognet            float_raise( float_flag_invalid );
4361129203Scognet            if (    ! aSign
4362129203Scognet                 || (    ( aExp == 0x7FFF )
4363129203Scognet                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4364129203Scognet                    )
4365129203Scognet               ) {
4366129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
4367129203Scognet            }
4368129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
4369129203Scognet        }
4370129203Scognet        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4371129203Scognet    }
4372129203Scognet    else {
4373129203Scognet        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4374129203Scognet    }
4375129203Scognet    return roundAndPackInt64( aSign, aSig0, aSig1 );
4376129203Scognet
4377129203Scognet}
4378129203Scognet
4379129203Scognet/*
4380129203Scognet-------------------------------------------------------------------------------
4381129203ScognetReturns the result of converting the quadruple-precision floating-point
4382129203Scognetvalue `a' to the 64-bit two's complement integer format.  The conversion
4383129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4384129203ScognetArithmetic, except that the conversion is always rounded toward zero.
4385129203ScognetIf `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4386129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is
4387129203Scognetreturned.
4388129203Scognet-------------------------------------------------------------------------------
4389129203Scognet*/
4390129203Scognetint64 float128_to_int64_round_to_zero( float128 a )
4391129203Scognet{
4392129203Scognet    flag aSign;
4393129203Scognet    int32 aExp, shiftCount;
4394129203Scognet    bits64 aSig0, aSig1;
4395129203Scognet    int64 z;
4396129203Scognet
4397129203Scognet    aSig1 = extractFloat128Frac1( a );
4398129203Scognet    aSig0 = extractFloat128Frac0( a );
4399129203Scognet    aExp = extractFloat128Exp( a );
4400129203Scognet    aSign = extractFloat128Sign( a );
4401129203Scognet    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4402129203Scognet    shiftCount = aExp - 0x402F;
4403129203Scognet    if ( 0 < shiftCount ) {
4404129203Scognet        if ( 0x403E <= aExp ) {
4405129203Scognet            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4406129203Scognet            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4407129203Scognet                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4408129203Scognet                if ( aSig1 ) float_exception_flags |= float_flag_inexact;
4409129203Scognet            }
4410129203Scognet            else {
4411129203Scognet                float_raise( float_flag_invalid );
4412129203Scognet                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4413129203Scognet                    return LIT64( 0x7FFFFFFFFFFFFFFF );
4414129203Scognet                }
4415129203Scognet            }
4416129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
4417129203Scognet        }
4418129203Scognet        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4419129203Scognet        if ( (bits64) ( aSig1<<shiftCount ) ) {
4420129203Scognet            float_exception_flags |= float_flag_inexact;
4421129203Scognet        }
4422129203Scognet    }
4423129203Scognet    else {
4424129203Scognet        if ( aExp < 0x3FFF ) {
4425129203Scognet            if ( aExp | aSig0 | aSig1 ) {
4426129203Scognet                float_exception_flags |= float_flag_inexact;
4427129203Scognet            }
4428129203Scognet            return 0;
4429129203Scognet        }
4430129203Scognet        z = aSig0>>( - shiftCount );
4431129203Scognet        if (    aSig1
4432129203Scognet             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4433129203Scognet            float_exception_flags |= float_flag_inexact;
4434129203Scognet        }
4435129203Scognet    }
4436129203Scognet    if ( aSign ) z = - z;
4437129203Scognet    return z;
4438129203Scognet
4439129203Scognet}
4440129203Scognet
4441129203Scognet/*
4442129203Scognet-------------------------------------------------------------------------------
4443129203ScognetReturns the result of converting the quadruple-precision floating-point
4444129203Scognetvalue `a' to the single-precision floating-point format.  The conversion
4445129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4446129203ScognetArithmetic.
4447129203Scognet-------------------------------------------------------------------------------
4448129203Scognet*/
4449129203Scognetfloat32 float128_to_float32( float128 a )
4450129203Scognet{
4451129203Scognet    flag aSign;
4452129203Scognet    int32 aExp;
4453129203Scognet    bits64 aSig0, aSig1;
4454129203Scognet    bits32 zSig;
4455129203Scognet
4456129203Scognet    aSig1 = extractFloat128Frac1( a );
4457129203Scognet    aSig0 = extractFloat128Frac0( a );
4458129203Scognet    aExp = extractFloat128Exp( a );
4459129203Scognet    aSign = extractFloat128Sign( a );
4460129203Scognet    if ( aExp == 0x7FFF ) {
4461129203Scognet        if ( aSig0 | aSig1 ) {
4462129203Scognet            return commonNaNToFloat32( float128ToCommonNaN( a ) );
4463129203Scognet        }
4464129203Scognet        return packFloat32( aSign, 0xFF, 0 );
4465129203Scognet    }
4466129203Scognet    aSig0 |= ( aSig1 != 0 );
4467129203Scognet    shift64RightJamming( aSig0, 18, &aSig0 );
4468129203Scognet    zSig = aSig0;
4469129203Scognet    if ( aExp || zSig ) {
4470129203Scognet        zSig |= 0x40000000;
4471129203Scognet        aExp -= 0x3F81;
4472129203Scognet    }
4473129203Scognet    return roundAndPackFloat32( aSign, aExp, zSig );
4474129203Scognet
4475129203Scognet}
4476129203Scognet
4477129203Scognet/*
4478129203Scognet-------------------------------------------------------------------------------
4479129203ScognetReturns the result of converting the quadruple-precision floating-point
4480129203Scognetvalue `a' to the double-precision floating-point format.  The conversion
4481129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
4482129203ScognetArithmetic.
4483129203Scognet-------------------------------------------------------------------------------
4484129203Scognet*/
4485129203Scognetfloat64 float128_to_float64( float128 a )
4486129203Scognet{
4487129203Scognet    flag aSign;
4488129203Scognet    int32 aExp;
4489129203Scognet    bits64 aSig0, aSig1;
4490129203Scognet
4491129203Scognet    aSig1 = extractFloat128Frac1( a );
4492129203Scognet    aSig0 = extractFloat128Frac0( a );
4493129203Scognet    aExp = extractFloat128Exp( a );
4494129203Scognet    aSign = extractFloat128Sign( a );
4495129203Scognet    if ( aExp == 0x7FFF ) {
4496129203Scognet        if ( aSig0 | aSig1 ) {
4497129203Scognet            return commonNaNToFloat64( float128ToCommonNaN( a ) );
4498129203Scognet        }
4499129203Scognet        return packFloat64( aSign, 0x7FF, 0 );
4500129203Scognet    }
4501129203Scognet    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4502129203Scognet    aSig0 |= ( aSig1 != 0 );
4503129203Scognet    if ( aExp || aSig0 ) {
4504129203Scognet        aSig0 |= LIT64( 0x4000000000000000 );
4505129203Scognet        aExp -= 0x3C01;
4506129203Scognet    }
4507129203Scognet    return roundAndPackFloat64( aSign, aExp, aSig0 );
4508129203Scognet
4509129203Scognet}
4510129203Scognet
4511129203Scognet#ifdef FLOATX80
4512129203Scognet
4513129203Scognet/*
4514129203Scognet-------------------------------------------------------------------------------
4515129203ScognetReturns the result of converting the quadruple-precision floating-point
4516129203Scognetvalue `a' to the extended double-precision floating-point format.  The
4517129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
4518129203ScognetFloating-Point Arithmetic.
4519129203Scognet-------------------------------------------------------------------------------
4520129203Scognet*/
4521129203Scognetfloatx80 float128_to_floatx80( float128 a )
4522129203Scognet{
4523129203Scognet    flag aSign;
4524129203Scognet    int32 aExp;
4525129203Scognet    bits64 aSig0, aSig1;
4526129203Scognet
4527129203Scognet    aSig1 = extractFloat128Frac1( a );
4528129203Scognet    aSig0 = extractFloat128Frac0( a );
4529129203Scognet    aExp = extractFloat128Exp( a );
4530129203Scognet    aSign = extractFloat128Sign( a );
4531129203Scognet    if ( aExp == 0x7FFF ) {
4532129203Scognet        if ( aSig0 | aSig1 ) {
4533129203Scognet            return commonNaNToFloatx80( float128ToCommonNaN( a ) );
4534129203Scognet        }
4535129203Scognet        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4536129203Scognet    }
4537129203Scognet    if ( aExp == 0 ) {
4538129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4539129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4540129203Scognet    }
4541129203Scognet    else {
4542129203Scognet        aSig0 |= LIT64( 0x0001000000000000 );
4543129203Scognet    }
4544129203Scognet    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4545129203Scognet    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
4546129203Scognet
4547129203Scognet}
4548129203Scognet
4549129203Scognet#endif
4550129203Scognet
4551129203Scognet/*
4552129203Scognet-------------------------------------------------------------------------------
4553129203ScognetRounds the quadruple-precision floating-point value `a' to an integer, and
4554129203Scognetreturns the result as a quadruple-precision floating-point value.  The
4555129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary
4556129203ScognetFloating-Point Arithmetic.
4557129203Scognet-------------------------------------------------------------------------------
4558129203Scognet*/
4559129203Scognetfloat128 float128_round_to_int( float128 a )
4560129203Scognet{
4561129203Scognet    flag aSign;
4562129203Scognet    int32 aExp;
4563129203Scognet    bits64 lastBitMask, roundBitsMask;
4564129203Scognet    int8 roundingMode;
4565129203Scognet    float128 z;
4566129203Scognet
4567129203Scognet    aExp = extractFloat128Exp( a );
4568129203Scognet    if ( 0x402F <= aExp ) {
4569129203Scognet        if ( 0x406F <= aExp ) {
4570129203Scognet            if (    ( aExp == 0x7FFF )
4571129203Scognet                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4572129203Scognet               ) {
4573129203Scognet                return propagateFloat128NaN( a, a );
4574129203Scognet            }
4575129203Scognet            return a;
4576129203Scognet        }
4577129203Scognet        lastBitMask = 1;
4578129203Scognet        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4579129203Scognet        roundBitsMask = lastBitMask - 1;
4580129203Scognet        z = a;
4581129203Scognet        roundingMode = float_rounding_mode;
4582129203Scognet        if ( roundingMode == float_round_nearest_even ) {
4583129203Scognet            if ( lastBitMask ) {
4584129203Scognet                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4585129203Scognet                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4586129203Scognet            }
4587129203Scognet            else {
4588129203Scognet                if ( (sbits64) z.low < 0 ) {
4589129203Scognet                    ++z.high;
4590129203Scognet                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4591129203Scognet                }
4592129203Scognet            }
4593129203Scognet        }
4594129203Scognet        else if ( roundingMode != float_round_to_zero ) {
4595129203Scognet            if (   extractFloat128Sign( z )
4596129203Scognet                 ^ ( roundingMode == float_round_up ) ) {
4597129203Scognet                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4598129203Scognet            }
4599129203Scognet        }
4600129203Scognet        z.low &= ~ roundBitsMask;
4601129203Scognet    }
4602129203Scognet    else {
4603129203Scognet        if ( aExp < 0x3FFF ) {
4604129203Scognet            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4605129203Scognet            float_exception_flags |= float_flag_inexact;
4606129203Scognet            aSign = extractFloat128Sign( a );
4607129203Scognet            switch ( float_rounding_mode ) {
4608129203Scognet             case float_round_nearest_even:
4609129203Scognet                if (    ( aExp == 0x3FFE )
4610129203Scognet                     && (   extractFloat128Frac0( a )
4611129203Scognet                          | extractFloat128Frac1( a ) )
4612129203Scognet                   ) {
4613129203Scognet                    return packFloat128( aSign, 0x3FFF, 0, 0 );
4614129203Scognet                }
4615129203Scognet                break;
4616129203Scognet	     case float_round_to_zero:
4617129203Scognet		break;
4618129203Scognet             case float_round_down:
4619129203Scognet                return
4620129203Scognet                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4621129203Scognet                    : packFloat128( 0, 0, 0, 0 );
4622129203Scognet             case float_round_up:
4623129203Scognet                return
4624129203Scognet                      aSign ? packFloat128( 1, 0, 0, 0 )
4625129203Scognet                    : packFloat128( 0, 0x3FFF, 0, 0 );
4626129203Scognet            }
4627129203Scognet            return packFloat128( aSign, 0, 0, 0 );
4628129203Scognet        }
4629129203Scognet        lastBitMask = 1;
4630129203Scognet        lastBitMask <<= 0x402F - aExp;
4631129203Scognet        roundBitsMask = lastBitMask - 1;
4632129203Scognet        z.low = 0;
4633129203Scognet        z.high = a.high;
4634129203Scognet        roundingMode = float_rounding_mode;
4635129203Scognet        if ( roundingMode == float_round_nearest_even ) {
4636129203Scognet            z.high += lastBitMask>>1;
4637129203Scognet            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4638129203Scognet                z.high &= ~ lastBitMask;
4639129203Scognet            }
4640129203Scognet        }
4641129203Scognet        else if ( roundingMode != float_round_to_zero ) {
4642129203Scognet            if (   extractFloat128Sign( z )
4643129203Scognet                 ^ ( roundingMode == float_round_up ) ) {
4644129203Scognet                z.high |= ( a.low != 0 );
4645129203Scognet                z.high += roundBitsMask;
4646129203Scognet            }
4647129203Scognet        }
4648129203Scognet        z.high &= ~ roundBitsMask;
4649129203Scognet    }
4650129203Scognet    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4651129203Scognet        float_exception_flags |= float_flag_inexact;
4652129203Scognet    }
4653129203Scognet    return z;
4654129203Scognet
4655129203Scognet}
4656129203Scognet
4657129203Scognet/*
4658129203Scognet-------------------------------------------------------------------------------
4659129203ScognetReturns the result of adding the absolute values of the quadruple-precision
4660129203Scognetfloating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
4661129203Scognetbefore being returned.  `zSign' is ignored if the result is a NaN.
4662129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary
4663129203ScognetFloating-Point Arithmetic.
4664129203Scognet-------------------------------------------------------------------------------
4665129203Scognet*/
4666129203Scognetstatic float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
4667129203Scognet{
4668129203Scognet    int32 aExp, bExp, zExp;
4669129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4670129203Scognet    int32 expDiff;
4671129203Scognet
4672129203Scognet    aSig1 = extractFloat128Frac1( a );
4673129203Scognet    aSig0 = extractFloat128Frac0( a );
4674129203Scognet    aExp = extractFloat128Exp( a );
4675129203Scognet    bSig1 = extractFloat128Frac1( b );
4676129203Scognet    bSig0 = extractFloat128Frac0( b );
4677129203Scognet    bExp = extractFloat128Exp( b );
4678129203Scognet    expDiff = aExp - bExp;
4679129203Scognet    if ( 0 < expDiff ) {
4680129203Scognet        if ( aExp == 0x7FFF ) {
4681129203Scognet            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4682129203Scognet            return a;
4683129203Scognet        }
4684129203Scognet        if ( bExp == 0 ) {
4685129203Scognet            --expDiff;
4686129203Scognet        }
4687129203Scognet        else {
4688129203Scognet            bSig0 |= LIT64( 0x0001000000000000 );
4689129203Scognet        }
4690129203Scognet        shift128ExtraRightJamming(
4691129203Scognet            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4692129203Scognet        zExp = aExp;
4693129203Scognet    }
4694129203Scognet    else if ( expDiff < 0 ) {
4695129203Scognet        if ( bExp == 0x7FFF ) {
4696129203Scognet            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4697129203Scognet            return packFloat128( zSign, 0x7FFF, 0, 0 );
4698129203Scognet        }
4699129203Scognet        if ( aExp == 0 ) {
4700129203Scognet            ++expDiff;
4701129203Scognet        }
4702129203Scognet        else {
4703129203Scognet            aSig0 |= LIT64( 0x0001000000000000 );
4704129203Scognet        }
4705129203Scognet        shift128ExtraRightJamming(
4706129203Scognet            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4707129203Scognet        zExp = bExp;
4708129203Scognet    }
4709129203Scognet    else {
4710129203Scognet        if ( aExp == 0x7FFF ) {
4711129203Scognet            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4712129203Scognet                return propagateFloat128NaN( a, b );
4713129203Scognet            }
4714129203Scognet            return a;
4715129203Scognet        }
4716129203Scognet        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4717129203Scognet        if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
4718129203Scognet        zSig2 = 0;
4719129203Scognet        zSig0 |= LIT64( 0x0002000000000000 );
4720129203Scognet        zExp = aExp;
4721129203Scognet        goto shiftRight1;
4722129203Scognet    }
4723129203Scognet    aSig0 |= LIT64( 0x0001000000000000 );
4724129203Scognet    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4725129203Scognet    --zExp;
4726129203Scognet    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4727129203Scognet    ++zExp;
4728129203Scognet shiftRight1:
4729129203Scognet    shift128ExtraRightJamming(
4730129203Scognet        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4731129203Scognet roundAndPack:
4732129203Scognet    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
4733129203Scognet
4734129203Scognet}
4735129203Scognet
4736129203Scognet/*
4737129203Scognet-------------------------------------------------------------------------------
4738129203ScognetReturns the result of subtracting the absolute values of the quadruple-
4739129203Scognetprecision floating-point values `a' and `b'.  If `zSign' is 1, the
4740129203Scognetdifference is negated before being returned.  `zSign' is ignored if the
4741129203Scognetresult is a NaN.  The subtraction is performed according to the IEC/IEEE
4742129203ScognetStandard for Binary Floating-Point Arithmetic.
4743129203Scognet-------------------------------------------------------------------------------
4744129203Scognet*/
4745129203Scognetstatic float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
4746129203Scognet{
4747129203Scognet    int32 aExp, bExp, zExp;
4748129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4749129203Scognet    int32 expDiff;
4750129203Scognet    float128 z;
4751129203Scognet
4752129203Scognet    aSig1 = extractFloat128Frac1( a );
4753129203Scognet    aSig0 = extractFloat128Frac0( a );
4754129203Scognet    aExp = extractFloat128Exp( a );
4755129203Scognet    bSig1 = extractFloat128Frac1( b );
4756129203Scognet    bSig0 = extractFloat128Frac0( b );
4757129203Scognet    bExp = extractFloat128Exp( b );
4758129203Scognet    expDiff = aExp - bExp;
4759129203Scognet    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4760129203Scognet    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4761129203Scognet    if ( 0 < expDiff ) goto aExpBigger;
4762129203Scognet    if ( expDiff < 0 ) goto bExpBigger;
4763129203Scognet    if ( aExp == 0x7FFF ) {
4764129203Scognet        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4765129203Scognet            return propagateFloat128NaN( a, b );
4766129203Scognet        }
4767129203Scognet        float_raise( float_flag_invalid );
4768129203Scognet        z.low = float128_default_nan_low;
4769129203Scognet        z.high = float128_default_nan_high;
4770129203Scognet        return z;
4771129203Scognet    }
4772129203Scognet    if ( aExp == 0 ) {
4773129203Scognet        aExp = 1;
4774129203Scognet        bExp = 1;
4775129203Scognet    }
4776129203Scognet    if ( bSig0 < aSig0 ) goto aBigger;
4777129203Scognet    if ( aSig0 < bSig0 ) goto bBigger;
4778129203Scognet    if ( bSig1 < aSig1 ) goto aBigger;
4779129203Scognet    if ( aSig1 < bSig1 ) goto bBigger;
4780129203Scognet    return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
4781129203Scognet bExpBigger:
4782129203Scognet    if ( bExp == 0x7FFF ) {
4783129203Scognet        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4784129203Scognet        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4785129203Scognet    }
4786129203Scognet    if ( aExp == 0 ) {
4787129203Scognet        ++expDiff;
4788129203Scognet    }
4789129203Scognet    else {
4790129203Scognet        aSig0 |= LIT64( 0x4000000000000000 );
4791129203Scognet    }
4792129203Scognet    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4793129203Scognet    bSig0 |= LIT64( 0x4000000000000000 );
4794129203Scognet bBigger:
4795129203Scognet    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4796129203Scognet    zExp = bExp;
4797129203Scognet    zSign ^= 1;
4798129203Scognet    goto normalizeRoundAndPack;
4799129203Scognet aExpBigger:
4800129203Scognet    if ( aExp == 0x7FFF ) {
4801129203Scognet        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4802129203Scognet        return a;
4803129203Scognet    }
4804129203Scognet    if ( bExp == 0 ) {
4805129203Scognet        --expDiff;
4806129203Scognet    }
4807129203Scognet    else {
4808129203Scognet        bSig0 |= LIT64( 0x4000000000000000 );
4809129203Scognet    }
4810129203Scognet    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4811129203Scognet    aSig0 |= LIT64( 0x4000000000000000 );
4812129203Scognet aBigger:
4813129203Scognet    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4814129203Scognet    zExp = aExp;
4815129203Scognet normalizeRoundAndPack:
4816129203Scognet    --zExp;
4817129203Scognet    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
4818129203Scognet
4819129203Scognet}
4820129203Scognet
4821129203Scognet/*
4822129203Scognet-------------------------------------------------------------------------------
4823129203ScognetReturns the result of adding the quadruple-precision floating-point values
4824129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
4825129203Scognetfor Binary Floating-Point Arithmetic.
4826129203Scognet-------------------------------------------------------------------------------
4827129203Scognet*/
4828129203Scognetfloat128 float128_add( float128 a, float128 b )
4829129203Scognet{
4830129203Scognet    flag aSign, bSign;
4831129203Scognet
4832129203Scognet    aSign = extractFloat128Sign( a );
4833129203Scognet    bSign = extractFloat128Sign( b );
4834129203Scognet    if ( aSign == bSign ) {
4835129203Scognet        return addFloat128Sigs( a, b, aSign );
4836129203Scognet    }
4837129203Scognet    else {
4838129203Scognet        return subFloat128Sigs( a, b, aSign );
4839129203Scognet    }
4840129203Scognet
4841129203Scognet}
4842129203Scognet
4843129203Scognet/*
4844129203Scognet-------------------------------------------------------------------------------
4845129203ScognetReturns the result of subtracting the quadruple-precision floating-point
4846129203Scognetvalues `a' and `b'.  The operation is performed according to the IEC/IEEE
4847129203ScognetStandard for Binary Floating-Point Arithmetic.
4848129203Scognet-------------------------------------------------------------------------------
4849129203Scognet*/
4850129203Scognetfloat128 float128_sub( float128 a, float128 b )
4851129203Scognet{
4852129203Scognet    flag aSign, bSign;
4853129203Scognet
4854129203Scognet    aSign = extractFloat128Sign( a );
4855129203Scognet    bSign = extractFloat128Sign( b );
4856129203Scognet    if ( aSign == bSign ) {
4857129203Scognet        return subFloat128Sigs( a, b, aSign );
4858129203Scognet    }
4859129203Scognet    else {
4860129203Scognet        return addFloat128Sigs( a, b, aSign );
4861129203Scognet    }
4862129203Scognet
4863129203Scognet}
4864129203Scognet
4865129203Scognet/*
4866129203Scognet-------------------------------------------------------------------------------
4867129203ScognetReturns the result of multiplying the quadruple-precision floating-point
4868129203Scognetvalues `a' and `b'.  The operation is performed according to the IEC/IEEE
4869129203ScognetStandard for Binary Floating-Point Arithmetic.
4870129203Scognet-------------------------------------------------------------------------------
4871129203Scognet*/
4872129203Scognetfloat128 float128_mul( float128 a, float128 b )
4873129203Scognet{
4874129203Scognet    flag aSign, bSign, zSign;
4875129203Scognet    int32 aExp, bExp, zExp;
4876129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
4877129203Scognet    float128 z;
4878129203Scognet
4879129203Scognet    aSig1 = extractFloat128Frac1( a );
4880129203Scognet    aSig0 = extractFloat128Frac0( a );
4881129203Scognet    aExp = extractFloat128Exp( a );
4882129203Scognet    aSign = extractFloat128Sign( a );
4883129203Scognet    bSig1 = extractFloat128Frac1( b );
4884129203Scognet    bSig0 = extractFloat128Frac0( b );
4885129203Scognet    bExp = extractFloat128Exp( b );
4886129203Scognet    bSign = extractFloat128Sign( b );
4887129203Scognet    zSign = aSign ^ bSign;
4888129203Scognet    if ( aExp == 0x7FFF ) {
4889129203Scognet        if (    ( aSig0 | aSig1 )
4890129203Scognet             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4891129203Scognet            return propagateFloat128NaN( a, b );
4892129203Scognet        }
4893129203Scognet        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
4894129203Scognet        return packFloat128( zSign, 0x7FFF, 0, 0 );
4895129203Scognet    }
4896129203Scognet    if ( bExp == 0x7FFF ) {
4897129203Scognet        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4898129203Scognet        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4899129203Scognet invalid:
4900129203Scognet            float_raise( float_flag_invalid );
4901129203Scognet            z.low = float128_default_nan_low;
4902129203Scognet            z.high = float128_default_nan_high;
4903129203Scognet            return z;
4904129203Scognet        }
4905129203Scognet        return packFloat128( zSign, 0x7FFF, 0, 0 );
4906129203Scognet    }
4907129203Scognet    if ( aExp == 0 ) {
4908129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4909129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4910129203Scognet    }
4911129203Scognet    if ( bExp == 0 ) {
4912129203Scognet        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4913129203Scognet        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4914129203Scognet    }
4915129203Scognet    zExp = aExp + bExp - 0x4000;
4916129203Scognet    aSig0 |= LIT64( 0x0001000000000000 );
4917129203Scognet    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
4918129203Scognet    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
4919129203Scognet    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
4920129203Scognet    zSig2 |= ( zSig3 != 0 );
4921129203Scognet    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
4922129203Scognet        shift128ExtraRightJamming(
4923129203Scognet            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4924129203Scognet        ++zExp;
4925129203Scognet    }
4926129203Scognet    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
4927129203Scognet
4928129203Scognet}
4929129203Scognet
4930129203Scognet/*
4931129203Scognet-------------------------------------------------------------------------------
4932129203ScognetReturns the result of dividing the quadruple-precision floating-point value
4933129203Scognet`a' by the corresponding value `b'.  The operation is performed according to
4934129203Scognetthe IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4935129203Scognet-------------------------------------------------------------------------------
4936129203Scognet*/
4937129203Scognetfloat128 float128_div( float128 a, float128 b )
4938129203Scognet{
4939129203Scognet    flag aSign, bSign, zSign;
4940129203Scognet    int32 aExp, bExp, zExp;
4941129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4942129203Scognet    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4943129203Scognet    float128 z;
4944129203Scognet
4945129203Scognet    aSig1 = extractFloat128Frac1( a );
4946129203Scognet    aSig0 = extractFloat128Frac0( a );
4947129203Scognet    aExp = extractFloat128Exp( a );
4948129203Scognet    aSign = extractFloat128Sign( a );
4949129203Scognet    bSig1 = extractFloat128Frac1( b );
4950129203Scognet    bSig0 = extractFloat128Frac0( b );
4951129203Scognet    bExp = extractFloat128Exp( b );
4952129203Scognet    bSign = extractFloat128Sign( b );
4953129203Scognet    zSign = aSign ^ bSign;
4954129203Scognet    if ( aExp == 0x7FFF ) {
4955129203Scognet        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4956129203Scognet        if ( bExp == 0x7FFF ) {
4957129203Scognet            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4958129203Scognet            goto invalid;
4959129203Scognet        }
4960129203Scognet        return packFloat128( zSign, 0x7FFF, 0, 0 );
4961129203Scognet    }
4962129203Scognet    if ( bExp == 0x7FFF ) {
4963129203Scognet        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4964129203Scognet        return packFloat128( zSign, 0, 0, 0 );
4965129203Scognet    }
4966129203Scognet    if ( bExp == 0 ) {
4967129203Scognet        if ( ( bSig0 | bSig1 ) == 0 ) {
4968129203Scognet            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4969129203Scognet invalid:
4970129203Scognet                float_raise( float_flag_invalid );
4971129203Scognet                z.low = float128_default_nan_low;
4972129203Scognet                z.high = float128_default_nan_high;
4973129203Scognet                return z;
4974129203Scognet            }
4975129203Scognet            float_raise( float_flag_divbyzero );
4976129203Scognet            return packFloat128( zSign, 0x7FFF, 0, 0 );
4977129203Scognet        }
4978129203Scognet        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
4979129203Scognet    }
4980129203Scognet    if ( aExp == 0 ) {
4981129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
4982129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4983129203Scognet    }
4984129203Scognet    zExp = aExp - bExp + 0x3FFD;
4985129203Scognet    shortShift128Left(
4986129203Scognet        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
4987129203Scognet    shortShift128Left(
4988129203Scognet        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
4989129203Scognet    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
4990129203Scognet        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
4991129203Scognet        ++zExp;
4992129203Scognet    }
4993129203Scognet    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
4994129203Scognet    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
4995129203Scognet    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
4996129203Scognet    while ( (sbits64) rem0 < 0 ) {
4997129203Scognet        --zSig0;
4998129203Scognet        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
4999129203Scognet    }
5000129203Scognet    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5001129203Scognet    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5002129203Scognet        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5003129203Scognet        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5004129203Scognet        while ( (sbits64) rem1 < 0 ) {
5005129203Scognet            --zSig1;
5006129203Scognet            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5007129203Scognet        }
5008129203Scognet        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5009129203Scognet    }
5010129203Scognet    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5011129203Scognet    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5012129203Scognet
5013129203Scognet}
5014129203Scognet
5015129203Scognet/*
5016129203Scognet-------------------------------------------------------------------------------
5017129203ScognetReturns the remainder of the quadruple-precision floating-point value `a'
5018129203Scognetwith respect to the corresponding value `b'.  The operation is performed
5019129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5020129203Scognet-------------------------------------------------------------------------------
5021129203Scognet*/
5022129203Scognetfloat128 float128_rem( float128 a, float128 b )
5023129203Scognet{
5024129203Scognet    flag aSign, bSign, zSign;
5025129203Scognet    int32 aExp, bExp, expDiff;
5026129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5027129203Scognet    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
5028129203Scognet    sbits64 sigMean0;
5029129203Scognet    float128 z;
5030129203Scognet
5031129203Scognet    aSig1 = extractFloat128Frac1( a );
5032129203Scognet    aSig0 = extractFloat128Frac0( a );
5033129203Scognet    aExp = extractFloat128Exp( a );
5034129203Scognet    aSign = extractFloat128Sign( a );
5035129203Scognet    bSig1 = extractFloat128Frac1( b );
5036129203Scognet    bSig0 = extractFloat128Frac0( b );
5037129203Scognet    bExp = extractFloat128Exp( b );
5038129203Scognet    bSign = extractFloat128Sign( b );
5039129203Scognet    if ( aExp == 0x7FFF ) {
5040129203Scognet        if (    ( aSig0 | aSig1 )
5041129203Scognet             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5042129203Scognet            return propagateFloat128NaN( a, b );
5043129203Scognet        }
5044129203Scognet        goto invalid;
5045129203Scognet    }
5046129203Scognet    if ( bExp == 0x7FFF ) {
5047129203Scognet        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5048129203Scognet        return a;
5049129203Scognet    }
5050129203Scognet    if ( bExp == 0 ) {
5051129203Scognet        if ( ( bSig0 | bSig1 ) == 0 ) {
5052129203Scognet invalid:
5053129203Scognet            float_raise( float_flag_invalid );
5054129203Scognet            z.low = float128_default_nan_low;
5055129203Scognet            z.high = float128_default_nan_high;
5056129203Scognet            return z;
5057129203Scognet        }
5058129203Scognet        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5059129203Scognet    }
5060129203Scognet    if ( aExp == 0 ) {
5061129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return a;
5062129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5063129203Scognet    }
5064129203Scognet    expDiff = aExp - bExp;
5065129203Scognet    if ( expDiff < -1 ) return a;
5066129203Scognet    shortShift128Left(
5067129203Scognet        aSig0 | LIT64( 0x0001000000000000 ),
5068129203Scognet        aSig1,
5069129203Scognet        15 - ( expDiff < 0 ),
5070129203Scognet        &aSig0,
5071129203Scognet        &aSig1
5072129203Scognet    );
5073129203Scognet    shortShift128Left(
5074129203Scognet        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5075129203Scognet    q = le128( bSig0, bSig1, aSig0, aSig1 );
5076129203Scognet    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5077129203Scognet    expDiff -= 64;
5078129203Scognet    while ( 0 < expDiff ) {
5079129203Scognet        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5080129203Scognet        q = ( 4 < q ) ? q - 4 : 0;
5081129203Scognet        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5082129203Scognet        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5083129203Scognet        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5084129203Scognet        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5085129203Scognet        expDiff -= 61;
5086129203Scognet    }
5087129203Scognet    if ( -64 < expDiff ) {
5088129203Scognet        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5089129203Scognet        q = ( 4 < q ) ? q - 4 : 0;
5090129203Scognet        q >>= - expDiff;
5091129203Scognet        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5092129203Scognet        expDiff += 52;
5093129203Scognet        if ( expDiff < 0 ) {
5094129203Scognet            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5095129203Scognet        }
5096129203Scognet        else {
5097129203Scognet            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5098129203Scognet        }
5099129203Scognet        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5100129203Scognet        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5101129203Scognet    }
5102129203Scognet    else {
5103129203Scognet        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5104129203Scognet        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5105129203Scognet    }
5106129203Scognet    do {
5107129203Scognet        alternateASig0 = aSig0;
5108129203Scognet        alternateASig1 = aSig1;
5109129203Scognet        ++q;
5110129203Scognet        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5111129203Scognet    } while ( 0 <= (sbits64) aSig0 );
5112129203Scognet    add128(
5113129203Scognet        aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
5114129203Scognet    if (    ( sigMean0 < 0 )
5115129203Scognet         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5116129203Scognet        aSig0 = alternateASig0;
5117129203Scognet        aSig1 = alternateASig1;
5118129203Scognet    }
5119129203Scognet    zSign = ( (sbits64) aSig0 < 0 );
5120129203Scognet    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5121129203Scognet    return
5122129203Scognet        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
5123129203Scognet
5124129203Scognet}
5125129203Scognet
5126129203Scognet/*
5127129203Scognet-------------------------------------------------------------------------------
5128129203ScognetReturns the square root of the quadruple-precision floating-point value `a'.
5129129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary
5130129203ScognetFloating-Point Arithmetic.
5131129203Scognet-------------------------------------------------------------------------------
5132129203Scognet*/
5133129203Scognetfloat128 float128_sqrt( float128 a )
5134129203Scognet{
5135129203Scognet    flag aSign;
5136129203Scognet    int32 aExp, zExp;
5137129203Scognet    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5138129203Scognet    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5139129203Scognet    float128 z;
5140129203Scognet
5141129203Scognet    aSig1 = extractFloat128Frac1( a );
5142129203Scognet    aSig0 = extractFloat128Frac0( a );
5143129203Scognet    aExp = extractFloat128Exp( a );
5144129203Scognet    aSign = extractFloat128Sign( a );
5145129203Scognet    if ( aExp == 0x7FFF ) {
5146129203Scognet        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
5147129203Scognet        if ( ! aSign ) return a;
5148129203Scognet        goto invalid;
5149129203Scognet    }
5150129203Scognet    if ( aSign ) {
5151129203Scognet        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5152129203Scognet invalid:
5153129203Scognet        float_raise( float_flag_invalid );
5154129203Scognet        z.low = float128_default_nan_low;
5155129203Scognet        z.high = float128_default_nan_high;
5156129203Scognet        return z;
5157129203Scognet    }
5158129203Scognet    if ( aExp == 0 ) {
5159129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5160129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5161129203Scognet    }
5162129203Scognet    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5163129203Scognet    aSig0 |= LIT64( 0x0001000000000000 );
5164129203Scognet    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5165129203Scognet    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5166129203Scognet    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5167129203Scognet    doubleZSig0 = zSig0<<1;
5168129203Scognet    mul64To128( zSig0, zSig0, &term0, &term1 );
5169129203Scognet    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5170129203Scognet    while ( (sbits64) rem0 < 0 ) {
5171129203Scognet        --zSig0;
5172129203Scognet        doubleZSig0 -= 2;
5173129203Scognet        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5174129203Scognet    }
5175129203Scognet    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5176129203Scognet    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5177129203Scognet        if ( zSig1 == 0 ) zSig1 = 1;
5178129203Scognet        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5179129203Scognet        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5180129203Scognet        mul64To128( zSig1, zSig1, &term2, &term3 );
5181129203Scognet        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5182129203Scognet        while ( (sbits64) rem1 < 0 ) {
5183129203Scognet            --zSig1;
5184129203Scognet            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5185129203Scognet            term3 |= 1;
5186129203Scognet            term2 |= doubleZSig0;
5187129203Scognet            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5188129203Scognet        }
5189129203Scognet        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5190129203Scognet    }
5191129203Scognet    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5192129203Scognet    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
5193129203Scognet
5194129203Scognet}
5195129203Scognet
5196129203Scognet/*
5197129203Scognet-------------------------------------------------------------------------------
5198129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is equal to
5199129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
5200129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5201129203Scognet-------------------------------------------------------------------------------
5202129203Scognet*/
5203129203Scognetflag float128_eq( float128 a, float128 b )
5204129203Scognet{
5205129203Scognet
5206129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5207129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5208129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5209129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5210129203Scognet       ) {
5211129203Scognet        if (    float128_is_signaling_nan( a )
5212129203Scognet             || float128_is_signaling_nan( b ) ) {
5213129203Scognet            float_raise( float_flag_invalid );
5214129203Scognet        }
5215129203Scognet        return 0;
5216129203Scognet    }
5217129203Scognet    return
5218129203Scognet           ( a.low == b.low )
5219129203Scognet        && (    ( a.high == b.high )
5220129203Scognet             || (    ( a.low == 0 )
5221129203Scognet                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5222129203Scognet           );
5223129203Scognet
5224129203Scognet}
5225129203Scognet
5226129203Scognet/*
5227129203Scognet-------------------------------------------------------------------------------
5228129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than
5229129203Scognetor equal to the corresponding value `b', and 0 otherwise.  The comparison
5230129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
5231129203ScognetArithmetic.
5232129203Scognet-------------------------------------------------------------------------------
5233129203Scognet*/
5234129203Scognetflag float128_le( float128 a, float128 b )
5235129203Scognet{
5236129203Scognet    flag aSign, bSign;
5237129203Scognet
5238129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5239129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5240129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5241129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5242129203Scognet       ) {
5243129203Scognet        float_raise( float_flag_invalid );
5244129203Scognet        return 0;
5245129203Scognet    }
5246129203Scognet    aSign = extractFloat128Sign( a );
5247129203Scognet    bSign = extractFloat128Sign( b );
5248129203Scognet    if ( aSign != bSign ) {
5249129203Scognet        return
5250129203Scognet               aSign
5251129203Scognet            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5252129203Scognet                 == 0 );
5253129203Scognet    }
5254129203Scognet    return
5255129203Scognet          aSign ? le128( b.high, b.low, a.high, a.low )
5256129203Scognet        : le128( a.high, a.low, b.high, b.low );
5257129203Scognet
5258129203Scognet}
5259129203Scognet
5260129203Scognet/*
5261129203Scognet-------------------------------------------------------------------------------
5262129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than
5263129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
5264129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5265129203Scognet-------------------------------------------------------------------------------
5266129203Scognet*/
5267129203Scognetflag float128_lt( float128 a, float128 b )
5268129203Scognet{
5269129203Scognet    flag aSign, bSign;
5270129203Scognet
5271129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5272129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5273129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5274129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5275129203Scognet       ) {
5276129203Scognet        float_raise( float_flag_invalid );
5277129203Scognet        return 0;
5278129203Scognet    }
5279129203Scognet    aSign = extractFloat128Sign( a );
5280129203Scognet    bSign = extractFloat128Sign( b );
5281129203Scognet    if ( aSign != bSign ) {
5282129203Scognet        return
5283129203Scognet               aSign
5284129203Scognet            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5285129203Scognet                 != 0 );
5286129203Scognet    }
5287129203Scognet    return
5288129203Scognet          aSign ? lt128( b.high, b.low, a.high, a.low )
5289129203Scognet        : lt128( a.high, a.low, b.high, b.low );
5290129203Scognet
5291129203Scognet}
5292129203Scognet
5293129203Scognet/*
5294129203Scognet-------------------------------------------------------------------------------
5295129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is equal to
5296129203Scognetthe corresponding value `b', and 0 otherwise.  The invalid exception is
5297129203Scognetraised if either operand is a NaN.  Otherwise, the comparison is performed
5298129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5299129203Scognet-------------------------------------------------------------------------------
5300129203Scognet*/
5301129203Scognetflag float128_eq_signaling( float128 a, float128 b )
5302129203Scognet{
5303129203Scognet
5304129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5305129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5306129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5307129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5308129203Scognet       ) {
5309129203Scognet        float_raise( float_flag_invalid );
5310129203Scognet        return 0;
5311129203Scognet    }
5312129203Scognet    return
5313129203Scognet           ( a.low == b.low )
5314129203Scognet        && (    ( a.high == b.high )
5315129203Scognet             || (    ( a.low == 0 )
5316129203Scognet                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5317129203Scognet           );
5318129203Scognet
5319129203Scognet}
5320129203Scognet
5321129203Scognet/*
5322129203Scognet-------------------------------------------------------------------------------
5323129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than
5324129203Scognetor equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5325129203Scognetcause an exception.  Otherwise, the comparison is performed according to the
5326129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
5327129203Scognet-------------------------------------------------------------------------------
5328129203Scognet*/
5329129203Scognetflag float128_le_quiet( float128 a, float128 b )
5330129203Scognet{
5331129203Scognet    flag aSign, bSign;
5332129203Scognet
5333129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5334129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5335129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5336129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5337129203Scognet       ) {
5338129203Scognet        if (    float128_is_signaling_nan( a )
5339129203Scognet             || float128_is_signaling_nan( b ) ) {
5340129203Scognet            float_raise( float_flag_invalid );
5341129203Scognet        }
5342129203Scognet        return 0;
5343129203Scognet    }
5344129203Scognet    aSign = extractFloat128Sign( a );
5345129203Scognet    bSign = extractFloat128Sign( b );
5346129203Scognet    if ( aSign != bSign ) {
5347129203Scognet        return
5348129203Scognet               aSign
5349129203Scognet            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5350129203Scognet                 == 0 );
5351129203Scognet    }
5352129203Scognet    return
5353129203Scognet          aSign ? le128( b.high, b.low, a.high, a.low )
5354129203Scognet        : le128( a.high, a.low, b.high, b.low );
5355129203Scognet
5356129203Scognet}
5357129203Scognet
5358129203Scognet/*
5359129203Scognet-------------------------------------------------------------------------------
5360129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than
5361129203Scognetthe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5362129203Scognetexception.  Otherwise, the comparison is performed according to the IEC/IEEE
5363129203ScognetStandard for Binary Floating-Point Arithmetic.
5364129203Scognet-------------------------------------------------------------------------------
5365129203Scognet*/
5366129203Scognetflag float128_lt_quiet( float128 a, float128 b )
5367129203Scognet{
5368129203Scognet    flag aSign, bSign;
5369129203Scognet
5370129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5371129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5372129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5373129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5374129203Scognet       ) {
5375129203Scognet        if (    float128_is_signaling_nan( a )
5376129203Scognet             || float128_is_signaling_nan( b ) ) {
5377129203Scognet            float_raise( float_flag_invalid );
5378129203Scognet        }
5379129203Scognet        return 0;
5380129203Scognet    }
5381129203Scognet    aSign = extractFloat128Sign( a );
5382129203Scognet    bSign = extractFloat128Sign( b );
5383129203Scognet    if ( aSign != bSign ) {
5384129203Scognet        return
5385129203Scognet               aSign
5386129203Scognet            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5387129203Scognet                 != 0 );
5388129203Scognet    }
5389129203Scognet    return
5390129203Scognet          aSign ? lt128( b.high, b.low, a.high, a.low )
5391129203Scognet        : lt128( a.high, a.low, b.high, b.low );
5392129203Scognet
5393129203Scognet}
5394129203Scognet
5395129203Scognet#endif
5396129203Scognet
5397129203Scognet
5398129203Scognet#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
5399129203Scognet
5400129203Scognet/*
5401129203Scognet * These two routines are not part of the original softfloat distribution.
5402129203Scognet *
5403129203Scognet * They are based on the corresponding conversions to integer but return
5404129203Scognet * unsigned numbers instead since these functions are required by GCC.
5405129203Scognet *
5406129203Scognet * Added by Mark Brinicombe <mark@NetBSD.org>	27/09/97
5407129203Scognet *
5408129203Scognet * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
5409129203Scognet */
5410129203Scognet
5411129203Scognet/*
5412129203Scognet-------------------------------------------------------------------------------
5413129203ScognetReturns the result of converting the double-precision floating-point value
5414129203Scognet`a' to the 32-bit unsigned integer format.  The conversion is
5415129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-point
5416129203ScognetArithmetic, except that the conversion is always rounded toward zero.  If
5417129203Scognet`a' is a NaN, the largest positive integer is returned.  If the conversion
5418129203Scognetoverflows, the largest integer positive is returned.
5419129203Scognet-------------------------------------------------------------------------------
5420129203Scognet*/
5421129203Scognetuint32 float64_to_uint32_round_to_zero( float64 a )
5422129203Scognet{
5423129203Scognet    flag aSign;
5424129203Scognet    int16 aExp, shiftCount;
5425129203Scognet    bits64 aSig, savedASig;
5426129203Scognet    uint32 z;
5427129203Scognet
5428129203Scognet    aSig = extractFloat64Frac( a );
5429129203Scognet    aExp = extractFloat64Exp( a );
5430129203Scognet    aSign = extractFloat64Sign( a );
5431129203Scognet
5432129203Scognet    if (aSign) {
5433129203Scognet        float_raise( float_flag_invalid );
5434129203Scognet    	return(0);
5435129203Scognet    }
5436129203Scognet
5437129203Scognet    if ( 0x41E < aExp ) {
5438129203Scognet        float_raise( float_flag_invalid );
5439129203Scognet        return 0xffffffff;
5440129203Scognet    }
5441129203Scognet    else if ( aExp < 0x3FF ) {
5442129203Scognet        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
5443129203Scognet        return 0;
5444129203Scognet    }
5445129203Scognet    aSig |= LIT64( 0x0010000000000000 );
5446129203Scognet    shiftCount = 0x433 - aExp;
5447129203Scognet    savedASig = aSig;
5448129203Scognet    aSig >>= shiftCount;
5449129203Scognet    z = aSig;
5450129203Scognet    if ( ( aSig<<shiftCount ) != savedASig ) {
5451129203Scognet        float_exception_flags |= float_flag_inexact;
5452129203Scognet    }
5453129203Scognet    return z;
5454129203Scognet
5455129203Scognet}
5456129203Scognet
5457129203Scognet/*
5458129203Scognet-------------------------------------------------------------------------------
5459129203ScognetReturns the result of converting the single-precision floating-point value
5460129203Scognet`a' to the 32-bit unsigned integer format.  The conversion is
5461129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-point
5462129203ScognetArithmetic, except that the conversion is always rounded toward zero.  If
5463129203Scognet`a' is a NaN, the largest positive integer is returned.  If the conversion
5464129203Scognetoverflows, the largest positive integer is returned.
5465129203Scognet-------------------------------------------------------------------------------
5466129203Scognet*/
5467129203Scognetuint32 float32_to_uint32_round_to_zero( float32 a )
5468129203Scognet{
5469129203Scognet    flag aSign;
5470129203Scognet    int16 aExp, shiftCount;
5471129203Scognet    bits32 aSig;
5472129203Scognet    uint32 z;
5473129203Scognet
5474129203Scognet    aSig = extractFloat32Frac( a );
5475129203Scognet    aExp = extractFloat32Exp( a );
5476129203Scognet    aSign = extractFloat32Sign( a );
5477129203Scognet    shiftCount = aExp - 0x9E;
5478129203Scognet
5479129203Scognet    if (aSign) {
5480129203Scognet        float_raise( float_flag_invalid );
5481129203Scognet    	return(0);
5482129203Scognet    }
5483129203Scognet    if ( 0 < shiftCount ) {
5484129203Scognet        float_raise( float_flag_invalid );
5485129203Scognet        return 0xFFFFFFFF;
5486129203Scognet    }
5487129203Scognet    else if ( aExp <= 0x7E ) {
5488129203Scognet        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
5489129203Scognet        return 0;
5490129203Scognet    }
5491129203Scognet    aSig = ( aSig | 0x800000 )<<8;
5492129203Scognet    z = aSig>>( - shiftCount );
5493129203Scognet    if ( aSig<<( shiftCount & 31 ) ) {
5494129203Scognet        float_exception_flags |= float_flag_inexact;
5495129203Scognet    }
5496129203Scognet    return z;
5497129203Scognet
5498129203Scognet}
5499129203Scognet
5500129203Scognet#endif
5501