softfloat/bits64/softfloat.c

230363Sdas/* $NetBSD: softfloat.c,v 1.8 2011/07/10 04:52:23 matt Exp $ */
129203Scognet
129203Scognet/*
129203Scognet * This version hacked for use with gcc -msoft-float by bjh21.
129203Scognet * (Mostly a case of #ifdefing out things GCC doesn't need or provides
129203Scognet *  itself).
129203Scognet */
129203Scognet
129203Scognet/*
129203Scognet * Things you may want to define:
129203Scognet *
129203Scognet * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
129203Scognet *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
129203Scognet *   properly renamed.
129203Scognet */
129203Scognet
129203Scognet/*
129203Scognet===============================================================================
129203Scognet
129203ScognetThis C source file is part of the SoftFloat IEC/IEEE Floating-point
129203ScognetArithmetic Package, Release 2a.
129203Scognet
129203ScognetWritten by John R. Hauser.  This work was made possible in part by the
129203ScognetInternational Computer Science Institute, located at Suite 600, 1947 Center
129203ScognetStreet, Berkeley, California 94704.  Funding was partially provided by the
129203ScognetNational Science Foundation under grant MIP-9311980.  The original version
129203Scognetof this code was written as part of a project to build a fixed-point vector
129203Scognetprocessor in collaboration with the University of California at Berkeley,
129203Scognetoverseen by Profs. Nelson Morgan and John Wawrzynek.  More information
129203Scognetis available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
129203Scognetarithmetic/SoftFloat.html'.
129203Scognet
129203ScognetTHIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
129203Scognethas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
129203ScognetTIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
129203ScognetPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
129203ScognetAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
129203Scognet
129203ScognetDerivative works are acceptable, even for commercial purposes, so long as
129203Scognet(1) they include prominent notice that the work is derivative, and (2) they
129203Scognetinclude prominent notice akin to these four paragraphs for those parts of
129203Scognetthis code that are retained.
129203Scognet
129203Scognet===============================================================================
129203Scognet*/
129203Scognet
129203Scognet#include <sys/cdefs.h>
129203Scognet__FBSDID("$FreeBSD$");
129203Scognet
129203Scognet#ifdef SOFTFLOAT_FOR_GCC
129203Scognet#include "softfloat-for-gcc.h"
129203Scognet#endif
129203Scognet
129203Scognet#include "milieu.h"
129203Scognet#include "softfloat.h"
129203Scognet
129203Scognet/*
129203Scognet * Conversions between floats as stored in memory and floats as
129203Scognet * SoftFloat uses them
129203Scognet */
129203Scognet#ifndef FLOAT64_DEMANGLE
129203Scognet#define FLOAT64_DEMANGLE(a)	(a)
129203Scognet#endif
129203Scognet#ifndef FLOAT64_MANGLE
129203Scognet#define FLOAT64_MANGLE(a)	(a)
129203Scognet#endif
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetFloating-point rounding mode, extended double-precision rounding precision,
129203Scognetand exception flags.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
230189Sdasint float_rounding_mode = float_round_nearest_even;
230189Sdasint float_exception_flags = 0;
129203Scognet#ifdef FLOATX80
129203Scognetint8 floatx80_rounding_precision = 80;
129203Scognet#endif
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetPrimitive arithmetic functions, including multi-word arithmetic, and
129203Scognetdivision and square root approximations.  (Can be specialized to target if
129203Scognetdesired.)
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognet#include "softfloat-macros"
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetFunctions and definitions to determine:  (1) whether tininess for underflow
129203Scognetis detected before or after rounding by default, (2) what (if anything)
129203Scognethappens when exceptions are raised, (3) how signaling NaNs are distinguished
129203Scognetfrom quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
129203Scognetare propagated from function inputs to output.  These details are target-
129203Scognetspecific.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognet#include "softfloat-specialize"
129203Scognet
129203Scognet#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetTakes a 64-bit fixed-point value `absZ' with binary point between bits 6
129203Scognetand 7, and returns the properly rounded 32-bit integer corresponding to the
129203Scognetinput.  If `zSign' is 1, the input is negated before being converted to an
129203Scognetinteger.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
129203Scognetis simply rounded to an integer, with the inexact exception raised if the
129203Scognetinput cannot be represented exactly as an integer.  However, if the fixed-
129203Scognetpoint input is too large, the invalid exception is raised and the largest
129203Scognetpositive or negative integer is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic int32 roundAndPackInt32( flag zSign, bits64 absZ )
129203Scognet{
129203Scognet    int8 roundingMode;
129203Scognet    flag roundNearestEven;
129203Scognet    int8 roundIncrement, roundBits;
129203Scognet    int32 z;
129203Scognet
129203Scognet    roundingMode = float_rounding_mode;
129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
129203Scognet    roundIncrement = 0x40;
129203Scognet    if ( ! roundNearestEven ) {
129203Scognet        if ( roundingMode == float_round_to_zero ) {
129203Scognet            roundIncrement = 0;
129203Scognet        }
129203Scognet        else {
129203Scognet            roundIncrement = 0x7F;
129203Scognet            if ( zSign ) {
129203Scognet                if ( roundingMode == float_round_up ) roundIncrement = 0;
129203Scognet            }
129203Scognet            else {
129203Scognet                if ( roundingMode == float_round_down ) roundIncrement = 0;
129203Scognet            }
129203Scognet        }
129203Scognet    }
129203Scognet    roundBits = absZ & 0x7F;
129203Scognet    absZ = ( absZ + roundIncrement )>>7;
129203Scognet    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
129203Scognet    z = absZ;
129203Scognet    if ( zSign ) z = - z;
129203Scognet    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
129203Scognet    }
129203Scognet    if ( roundBits ) float_exception_flags |= float_flag_inexact;
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetTakes the 128-bit fixed-point value formed by concatenating `absZ0' and
129203Scognet`absZ1', with binary point between bits 63 and 64 (between the input words),
129203Scognetand returns the properly rounded 64-bit integer corresponding to the input.
129203ScognetIf `zSign' is 1, the input is negated before being converted to an integer.
129203ScognetOrdinarily, the fixed-point input is simply rounded to an integer, with
129203Scognetthe inexact exception raised if the input cannot be represented exactly as
129203Scognetan integer.  However, if the fixed-point input is too large, the invalid
129203Scognetexception is raised and the largest positive or negative integer is
129203Scognetreturned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
129203Scognet{
129203Scognet    int8 roundingMode;
129203Scognet    flag roundNearestEven, increment;
129203Scognet    int64 z;
129203Scognet
129203Scognet    roundingMode = float_rounding_mode;
129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
129203Scognet    increment = ( (sbits64) absZ1 < 0 );
129203Scognet    if ( ! roundNearestEven ) {
129203Scognet        if ( roundingMode == float_round_to_zero ) {
129203Scognet            increment = 0;
129203Scognet        }
129203Scognet        else {
129203Scognet            if ( zSign ) {
129203Scognet                increment = ( roundingMode == float_round_down ) && absZ1;
129203Scognet            }
129203Scognet            else {
129203Scognet                increment = ( roundingMode == float_round_up ) && absZ1;
129203Scognet            }
129203Scognet        }
129203Scognet    }
129203Scognet    if ( increment ) {
129203Scognet        ++absZ0;
129203Scognet        if ( absZ0 == 0 ) goto overflow;
129203Scognet        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
129203Scognet    }
129203Scognet    z = absZ0;
129203Scognet    if ( zSign ) z = - z;
129203Scognet    if ( z && ( ( z < 0 ) ^ zSign ) ) {
129203Scognet overflow:
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return
129203Scognet              zSign ? (sbits64) LIT64( 0x8000000000000000 )
129203Scognet            : LIT64( 0x7FFFFFFFFFFFFFFF );
129203Scognet    }
129203Scognet    if ( absZ1 ) float_exception_flags |= float_flag_inexact;
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet#endif
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the fraction bits of the single-precision floating-point value `a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE bits32 extractFloat32Frac( float32 a )
129203Scognet{
129203Scognet
129203Scognet    return a & 0x007FFFFF;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the exponent bits of the single-precision floating-point value `a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE int16 extractFloat32Exp( float32 a )
129203Scognet{
129203Scognet
129203Scognet    return ( a>>23 ) & 0xFF;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the sign bit of the single-precision floating-point value `a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE flag extractFloat32Sign( float32 a )
129203Scognet{
129203Scognet
129203Scognet    return a>>31;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetNormalizes the subnormal single-precision floating-point value represented
129203Scognetby the denormalized significand `aSig'.  The normalized exponent and
129203Scognetsignificand are stored at the locations pointed to by `zExpPtr' and
129203Scognet`zSigPtr', respectively.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic void
129203Scognet normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
129203Scognet{
129203Scognet    int8 shiftCount;
129203Scognet
129203Scognet    shiftCount = countLeadingZeros32( aSig ) - 8;
129203Scognet    *zSigPtr = aSig<<shiftCount;
129203Scognet    *zExpPtr = 1 - shiftCount;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into a
129203Scognetsingle-precision floating-point value, returning the result.  After being
129203Scognetshifted into the proper positions, the three fields are simply added
129203Scognettogether to form the result.  This means that any integer portion of `zSig'
129203Scognetwill be added into the exponent.  Since a properly normalized significand
129203Scognetwill have an integer portion equal to 1, the `zExp' input should be 1 less
129203Scognetthan the desired result exponent whenever `zSig' is a complete, normalized
129203Scognetsignificand.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
129203Scognet{
129203Scognet
129203Scognet    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
129203Scognetand significand `zSig', and returns the proper single-precision floating-
129203Scognetpoint value corresponding to the abstract input.  Ordinarily, the abstract
129203Scognetvalue is simply rounded and packed into the single-precision format, with
129203Scognetthe inexact exception raised if the abstract input cannot be represented
129203Scognetexactly.  However, if the abstract value is too large, the overflow and
129203Scognetinexact exceptions are raised and an infinity or maximal finite value is
129203Scognetreturned.  If the abstract value is too small, the input value is rounded to
129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if
129203Scognetthe abstract input cannot be represented exactly as a subnormal single-
129203Scognetprecision floating-point number.
129203Scognet    The input significand `zSig' has its binary point between bits 30
129203Scognetand 29, which is 7 bits to the left of the usual location.  This shifted
129203Scognetsignificand must be normalized or smaller.  If `zSig' is not normalized,
129203Scognet`zExp' must be 0; in that case, the result returned is a subnormal number,
129203Scognetand it must not require rounding.  In the usual case that `zSig' is
129203Scognetnormalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
129203ScognetThe handling of underflow and overflow follows the IEC/IEEE Standard for
129203ScognetBinary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
129203Scognet{
129203Scognet    int8 roundingMode;
129203Scognet    flag roundNearestEven;
129203Scognet    int8 roundIncrement, roundBits;
129203Scognet    flag isTiny;
129203Scognet
129203Scognet    roundingMode = float_rounding_mode;
129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
129203Scognet    roundIncrement = 0x40;
129203Scognet    if ( ! roundNearestEven ) {
129203Scognet        if ( roundingMode == float_round_to_zero ) {
129203Scognet            roundIncrement = 0;
129203Scognet        }
129203Scognet        else {
129203Scognet            roundIncrement = 0x7F;
129203Scognet            if ( zSign ) {
129203Scognet                if ( roundingMode == float_round_up ) roundIncrement = 0;
129203Scognet            }
129203Scognet            else {
129203Scognet                if ( roundingMode == float_round_down ) roundIncrement = 0;
129203Scognet            }
129203Scognet        }
129203Scognet    }
129203Scognet    roundBits = zSig & 0x7F;
129203Scognet    if ( 0xFD <= (bits16) zExp ) {
129203Scognet        if (    ( 0xFD < zExp )
129203Scognet             || (    ( zExp == 0xFD )
129203Scognet                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
129203Scognet           ) {
129203Scognet            float_raise( float_flag_overflow | float_flag_inexact );
129203Scognet            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
129203Scognet        }
129203Scognet        if ( zExp < 0 ) {
129203Scognet            isTiny =
129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
129203Scognet                || ( zExp < -1 )
129203Scognet                || ( zSig + roundIncrement < 0x80000000 );
129203Scognet            shift32RightJamming( zSig, - zExp, &zSig );
129203Scognet            zExp = 0;
129203Scognet            roundBits = zSig & 0x7F;
129203Scognet            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
129203Scognet        }
129203Scognet    }
129203Scognet    if ( roundBits ) float_exception_flags |= float_flag_inexact;
129203Scognet    zSig = ( zSig + roundIncrement )>>7;
129203Scognet    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
129203Scognet    if ( zSig == 0 ) zExp = 0;
129203Scognet    return packFloat32( zSign, zExp, zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
129203Scognetand significand `zSig', and returns the proper single-precision floating-
129203Scognetpoint value corresponding to the abstract input.  This routine is just like
129203Scognet`roundAndPackFloat32' except that `zSig' does not have to be normalized.
129203ScognetBit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
129203Scognetfloating-point exponent.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic float32
129203Scognet normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
129203Scognet{
129203Scognet    int8 shiftCount;
129203Scognet
129203Scognet    shiftCount = countLeadingZeros32( zSig ) - 1;
129203Scognet    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the fraction bits of the double-precision floating-point value `a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE bits64 extractFloat64Frac( float64 a )
129203Scognet{
129203Scognet
129203Scognet    return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the exponent bits of the double-precision floating-point value `a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE int16 extractFloat64Exp( float64 a )
129203Scognet{
129203Scognet
129203Scognet    return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the sign bit of the double-precision floating-point value `a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE flag extractFloat64Sign( float64 a )
129203Scognet{
129203Scognet
129203Scognet    return FLOAT64_DEMANGLE(a)>>63;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetNormalizes the subnormal double-precision floating-point value represented
129203Scognetby the denormalized significand `aSig'.  The normalized exponent and
129203Scognetsignificand are stored at the locations pointed to by `zExpPtr' and
129203Scognet`zSigPtr', respectively.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic void
129203Scognet normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
129203Scognet{
129203Scognet    int8 shiftCount;
129203Scognet
129203Scognet    shiftCount = countLeadingZeros64( aSig ) - 11;
129203Scognet    *zSigPtr = aSig<<shiftCount;
129203Scognet    *zExpPtr = 1 - shiftCount;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into a
129203Scognetdouble-precision floating-point value, returning the result.  After being
129203Scognetshifted into the proper positions, the three fields are simply added
129203Scognettogether to form the result.  This means that any integer portion of `zSig'
129203Scognetwill be added into the exponent.  Since a properly normalized significand
129203Scognetwill have an integer portion equal to 1, the `zExp' input should be 1 less
129203Scognetthan the desired result exponent whenever `zSig' is a complete, normalized
129203Scognetsignificand.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
129203Scognet{
129203Scognet
129203Scognet    return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
129203Scognet			   ( ( (bits64) zExp )<<52 ) + zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
129203Scognetand significand `zSig', and returns the proper double-precision floating-
129203Scognetpoint value corresponding to the abstract input.  Ordinarily, the abstract
129203Scognetvalue is simply rounded and packed into the double-precision format, with
129203Scognetthe inexact exception raised if the abstract input cannot be represented
129203Scognetexactly.  However, if the abstract value is too large, the overflow and
129203Scognetinexact exceptions are raised and an infinity or maximal finite value is
129203Scognetreturned.  If the abstract value is too small, the input value is rounded to
129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if
129203Scognetthe abstract input cannot be represented exactly as a subnormal double-
129203Scognetprecision floating-point number.
129203Scognet    The input significand `zSig' has its binary point between bits 62
129203Scognetand 61, which is 10 bits to the left of the usual location.  This shifted
129203Scognetsignificand must be normalized or smaller.  If `zSig' is not normalized,
129203Scognet`zExp' must be 0; in that case, the result returned is a subnormal number,
129203Scognetand it must not require rounding.  In the usual case that `zSig' is
129203Scognetnormalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
129203ScognetThe handling of underflow and overflow follows the IEC/IEEE Standard for
129203ScognetBinary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
129203Scognet{
129203Scognet    int8 roundingMode;
129203Scognet    flag roundNearestEven;
129203Scognet    int16 roundIncrement, roundBits;
129203Scognet    flag isTiny;
129203Scognet
129203Scognet    roundingMode = float_rounding_mode;
129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
129203Scognet    roundIncrement = 0x200;
129203Scognet    if ( ! roundNearestEven ) {
129203Scognet        if ( roundingMode == float_round_to_zero ) {
129203Scognet            roundIncrement = 0;
129203Scognet        }
129203Scognet        else {
129203Scognet            roundIncrement = 0x3FF;
129203Scognet            if ( zSign ) {
129203Scognet                if ( roundingMode == float_round_up ) roundIncrement = 0;
129203Scognet            }
129203Scognet            else {
129203Scognet                if ( roundingMode == float_round_down ) roundIncrement = 0;
129203Scognet            }
129203Scognet        }
129203Scognet    }
129203Scognet    roundBits = zSig & 0x3FF;
129203Scognet    if ( 0x7FD <= (bits16) zExp ) {
129203Scognet        if (    ( 0x7FD < zExp )
129203Scognet             || (    ( zExp == 0x7FD )
129203Scognet                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
129203Scognet           ) {
129203Scognet            float_raise( float_flag_overflow | float_flag_inexact );
129203Scognet            return FLOAT64_MANGLE(
129203Scognet		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
129203Scognet		( roundIncrement == 0 ));
129203Scognet        }
129203Scognet        if ( zExp < 0 ) {
129203Scognet            isTiny =
129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
129203Scognet                || ( zExp < -1 )
129203Scognet                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
129203Scognet            shift64RightJamming( zSig, - zExp, &zSig );
129203Scognet            zExp = 0;
129203Scognet            roundBits = zSig & 0x3FF;
129203Scognet            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
129203Scognet        }
129203Scognet    }
129203Scognet    if ( roundBits ) float_exception_flags |= float_flag_inexact;
129203Scognet    zSig = ( zSig + roundIncrement )>>10;
129203Scognet    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
129203Scognet    if ( zSig == 0 ) zExp = 0;
129203Scognet    return packFloat64( zSign, zExp, zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
129203Scognetand significand `zSig', and returns the proper double-precision floating-
129203Scognetpoint value corresponding to the abstract input.  This routine is just like
129203Scognet`roundAndPackFloat64' except that `zSig' does not have to be normalized.
129203ScognetBit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
129203Scognetfloating-point exponent.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic float64
129203Scognet normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
129203Scognet{
129203Scognet    int8 shiftCount;
129203Scognet
129203Scognet    shiftCount = countLeadingZeros64( zSig ) - 1;
129203Scognet    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#ifdef FLOATX80
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the fraction bits of the extended double-precision floating-point
129203Scognetvalue `a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE bits64 extractFloatx80Frac( floatx80 a )
129203Scognet{
129203Scognet
129203Scognet    return a.low;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the exponent bits of the extended double-precision floating-point
129203Scognetvalue `a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE int32 extractFloatx80Exp( floatx80 a )
129203Scognet{
129203Scognet
129203Scognet    return a.high & 0x7FFF;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the sign bit of the extended double-precision floating-point value
129203Scognet`a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE flag extractFloatx80Sign( floatx80 a )
129203Scognet{
129203Scognet
129203Scognet    return a.high>>15;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetNormalizes the subnormal extended double-precision floating-point value
129203Scognetrepresented by the denormalized significand `aSig'.  The normalized exponent
129203Scognetand significand are stored at the locations pointed to by `zExpPtr' and
129203Scognet`zSigPtr', respectively.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic void
129203Scognet normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
129203Scognet{
129203Scognet    int8 shiftCount;
129203Scognet
129203Scognet    shiftCount = countLeadingZeros64( aSig );
129203Scognet    *zSigPtr = aSig<<shiftCount;
129203Scognet    *zExpPtr = 1 - shiftCount;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into an
129203Scognetextended double-precision floating-point value, returning the result.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
129203Scognet{
129203Scognet    floatx80 z;
129203Scognet
129203Scognet    z.low = zSig;
129203Scognet    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
129203Scognetand extended significand formed by the concatenation of `zSig0' and `zSig1',
129203Scognetand returns the proper extended double-precision floating-point value
129203Scognetcorresponding to the abstract input.  Ordinarily, the abstract value is
129203Scognetrounded and packed into the extended double-precision format, with the
129203Scognetinexact exception raised if the abstract input cannot be represented
129203Scognetexactly.  However, if the abstract value is too large, the overflow and
129203Scognetinexact exceptions are raised and an infinity or maximal finite value is
129203Scognetreturned.  If the abstract value is too small, the input value is rounded to
129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if
129203Scognetthe abstract input cannot be represented exactly as a subnormal extended
129203Scognetdouble-precision floating-point number.
129203Scognet    If `roundingPrecision' is 32 or 64, the result is rounded to the same
129203Scognetnumber of bits as single or double precision, respectively.  Otherwise, the
129203Scognetresult is rounded to the full precision of the extended double-precision
129203Scognetformat.
129203Scognet    The input significand must be normalized or smaller.  If the input
129203Scognetsignificand is not normalized, `zExp' must be 0; in that case, the result
129203Scognetreturned is a subnormal number, and it must not require rounding.  The
129203Scognethandling of underflow and overflow follows the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic floatx80
129203Scognet roundAndPackFloatx80(
129203Scognet     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
129203Scognet )
129203Scognet{
129203Scognet    int8 roundingMode;
129203Scognet    flag roundNearestEven, increment, isTiny;
129203Scognet    int64 roundIncrement, roundMask, roundBits;
129203Scognet
129203Scognet    roundingMode = float_rounding_mode;
129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
129203Scognet    if ( roundingPrecision == 80 ) goto precision80;
129203Scognet    if ( roundingPrecision == 64 ) {
129203Scognet        roundIncrement = LIT64( 0x0000000000000400 );
129203Scognet        roundMask = LIT64( 0x00000000000007FF );
129203Scognet    }
129203Scognet    else if ( roundingPrecision == 32 ) {
129203Scognet        roundIncrement = LIT64( 0x0000008000000000 );
129203Scognet        roundMask = LIT64( 0x000000FFFFFFFFFF );
129203Scognet    }
129203Scognet    else {
129203Scognet        goto precision80;
129203Scognet    }
129203Scognet    zSig0 |= ( zSig1 != 0 );
129203Scognet    if ( ! roundNearestEven ) {
129203Scognet        if ( roundingMode == float_round_to_zero ) {
129203Scognet            roundIncrement = 0;
129203Scognet        }
129203Scognet        else {
129203Scognet            roundIncrement = roundMask;
129203Scognet            if ( zSign ) {
129203Scognet                if ( roundingMode == float_round_up ) roundIncrement = 0;
129203Scognet            }
129203Scognet            else {
129203Scognet                if ( roundingMode == float_round_down ) roundIncrement = 0;
129203Scognet            }
129203Scognet        }
129203Scognet    }
129203Scognet    roundBits = zSig0 & roundMask;
129203Scognet    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
129203Scognet        if (    ( 0x7FFE < zExp )
129203Scognet             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
129203Scognet           ) {
129203Scognet            goto overflow;
129203Scognet        }
129203Scognet        if ( zExp <= 0 ) {
129203Scognet            isTiny =
129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
129203Scognet                || ( zExp < 0 )
129203Scognet                || ( zSig0 <= zSig0 + roundIncrement );
129203Scognet            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
129203Scognet            zExp = 0;
129203Scognet            roundBits = zSig0 & roundMask;
129203Scognet            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
129203Scognet            if ( roundBits ) float_exception_flags |= float_flag_inexact;
129203Scognet            zSig0 += roundIncrement;
129203Scognet            if ( (sbits64) zSig0 < 0 ) zExp = 1;
129203Scognet            roundIncrement = roundMask + 1;
129203Scognet            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
129203Scognet                roundMask |= roundIncrement;
129203Scognet            }
129203Scognet            zSig0 &= ~ roundMask;
129203Scognet            return packFloatx80( zSign, zExp, zSig0 );
129203Scognet        }
129203Scognet    }
129203Scognet    if ( roundBits ) float_exception_flags |= float_flag_inexact;
129203Scognet    zSig0 += roundIncrement;
129203Scognet    if ( zSig0 < roundIncrement ) {
129203Scognet        ++zExp;
129203Scognet        zSig0 = LIT64( 0x8000000000000000 );
129203Scognet    }
129203Scognet    roundIncrement = roundMask + 1;
129203Scognet    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
129203Scognet        roundMask |= roundIncrement;
129203Scognet    }
129203Scognet    zSig0 &= ~ roundMask;
129203Scognet    if ( zSig0 == 0 ) zExp = 0;
129203Scognet    return packFloatx80( zSign, zExp, zSig0 );
129203Scognet precision80:
129203Scognet    increment = ( (sbits64) zSig1 < 0 );
129203Scognet    if ( ! roundNearestEven ) {
129203Scognet        if ( roundingMode == float_round_to_zero ) {
129203Scognet            increment = 0;
129203Scognet        }
129203Scognet        else {
129203Scognet            if ( zSign ) {
129203Scognet                increment = ( roundingMode == float_round_down ) && zSig1;
129203Scognet            }
129203Scognet            else {
129203Scognet                increment = ( roundingMode == float_round_up ) && zSig1;
129203Scognet            }
129203Scognet        }
129203Scognet    }
129203Scognet    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
129203Scognet        if (    ( 0x7FFE < zExp )
129203Scognet             || (    ( zExp == 0x7FFE )
129203Scognet                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
129203Scognet                  && increment
129203Scognet                )
129203Scognet           ) {
129203Scognet            roundMask = 0;
129203Scognet overflow:
129203Scognet            float_raise( float_flag_overflow | float_flag_inexact );
129203Scognet            if (    ( roundingMode == float_round_to_zero )
129203Scognet                 || ( zSign && ( roundingMode == float_round_up ) )
129203Scognet                 || ( ! zSign && ( roundingMode == float_round_down ) )
129203Scognet               ) {
129203Scognet                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
129203Scognet            }
129203Scognet            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
129203Scognet        }
129203Scognet        if ( zExp <= 0 ) {
129203Scognet            isTiny =
129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
129203Scognet                || ( zExp < 0 )
129203Scognet                || ! increment
129203Scognet                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
129203Scognet            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
129203Scognet            zExp = 0;
129203Scognet            if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
129203Scognet            if ( zSig1 ) float_exception_flags |= float_flag_inexact;
129203Scognet            if ( roundNearestEven ) {
129203Scognet                increment = ( (sbits64) zSig1 < 0 );
129203Scognet            }
129203Scognet            else {
129203Scognet                if ( zSign ) {
129203Scognet                    increment = ( roundingMode == float_round_down ) && zSig1;
129203Scognet                }
129203Scognet                else {
129203Scognet                    increment = ( roundingMode == float_round_up ) && zSig1;
129203Scognet                }
129203Scognet            }
129203Scognet            if ( increment ) {
129203Scognet                ++zSig0;
129203Scognet                zSig0 &=
129203Scognet                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
129203Scognet                if ( (sbits64) zSig0 < 0 ) zExp = 1;
129203Scognet            }
129203Scognet            return packFloatx80( zSign, zExp, zSig0 );
129203Scognet        }
129203Scognet    }
129203Scognet    if ( zSig1 ) float_exception_flags |= float_flag_inexact;
129203Scognet    if ( increment ) {
129203Scognet        ++zSig0;
129203Scognet        if ( zSig0 == 0 ) {
129203Scognet            ++zExp;
129203Scognet            zSig0 = LIT64( 0x8000000000000000 );
129203Scognet        }
129203Scognet        else {
129203Scognet            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
129203Scognet        }
129203Scognet    }
129203Scognet    else {
129203Scognet        if ( zSig0 == 0 ) zExp = 0;
129203Scognet    }
129203Scognet    return packFloatx80( zSign, zExp, zSig0 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetTakes an abstract floating-point value having sign `zSign', exponent
129203Scognet`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
129203Scognetand returns the proper extended double-precision floating-point value
129203Scognetcorresponding to the abstract input.  This routine is just like
129203Scognet`roundAndPackFloatx80' except that the input significand does not have to be
129203Scognetnormalized.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic floatx80
129203Scognet normalizeRoundAndPackFloatx80(
129203Scognet     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
129203Scognet )
129203Scognet{
129203Scognet    int8 shiftCount;
129203Scognet
129203Scognet    if ( zSig0 == 0 ) {
129203Scognet        zSig0 = zSig1;
129203Scognet        zSig1 = 0;
129203Scognet        zExp -= 64;
129203Scognet    }
129203Scognet    shiftCount = countLeadingZeros64( zSig0 );
129203Scognet    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
129203Scognet    zExp -= shiftCount;
129203Scognet    return
129203Scognet        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif
129203Scognet
129203Scognet#ifdef FLOAT128
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the least-significant 64 fraction bits of the quadruple-precision
129203Scognetfloating-point value `a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE bits64 extractFloat128Frac1( float128 a )
129203Scognet{
129203Scognet
129203Scognet    return a.low;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the most-significant 48 fraction bits of the quadruple-precision
129203Scognetfloating-point value `a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE bits64 extractFloat128Frac0( float128 a )
129203Scognet{
129203Scognet
129203Scognet    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the exponent bits of the quadruple-precision floating-point value
129203Scognet`a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE int32 extractFloat128Exp( float128 a )
129203Scognet{
129203Scognet
129203Scognet    return ( a.high>>48 ) & 0x7FFF;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the sign bit of the quadruple-precision floating-point value `a'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE flag extractFloat128Sign( float128 a )
129203Scognet{
129203Scognet
129203Scognet    return a.high>>63;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetNormalizes the subnormal quadruple-precision floating-point value
129203Scognetrepresented by the denormalized significand formed by the concatenation of
129203Scognet`aSig0' and `aSig1'.  The normalized exponent is stored at the location
129203Scognetpointed to by `zExpPtr'.  The most significant 49 bits of the normalized
129203Scognetsignificand are stored at the location pointed to by `zSig0Ptr', and the
129203Scognetleast significant 64 bits of the normalized significand are stored at the
129203Scognetlocation pointed to by `zSig1Ptr'.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic void
129203Scognet normalizeFloat128Subnormal(
129203Scognet     bits64 aSig0,
129203Scognet     bits64 aSig1,
129203Scognet     int32 *zExpPtr,
129203Scognet     bits64 *zSig0Ptr,
129203Scognet     bits64 *zSig1Ptr
129203Scognet )
129203Scognet{
129203Scognet    int8 shiftCount;
129203Scognet
129203Scognet    if ( aSig0 == 0 ) {
129203Scognet        shiftCount = countLeadingZeros64( aSig1 ) - 15;
129203Scognet        if ( shiftCount < 0 ) {
129203Scognet            *zSig0Ptr = aSig1>>( - shiftCount );
129203Scognet            *zSig1Ptr = aSig1<<( shiftCount & 63 );
129203Scognet        }
129203Scognet        else {
129203Scognet            *zSig0Ptr = aSig1<<shiftCount;
129203Scognet            *zSig1Ptr = 0;
129203Scognet        }
129203Scognet        *zExpPtr = - shiftCount - 63;
129203Scognet    }
129203Scognet    else {
129203Scognet        shiftCount = countLeadingZeros64( aSig0 ) - 15;
129203Scognet        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
129203Scognet        *zExpPtr = 1 - shiftCount;
129203Scognet    }
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetPacks the sign `zSign', the exponent `zExp', and the significand formed
129203Scognetby the concatenation of `zSig0' and `zSig1' into a quadruple-precision
129203Scognetfloating-point value, returning the result.  After being shifted into the
129203Scognetproper positions, the three fields `zSign', `zExp', and `zSig0' are simply
129203Scognetadded together to form the most significant 32 bits of the result.  This
129203Scognetmeans that any integer portion of `zSig0' will be added into the exponent.
129203ScognetSince a properly normalized significand will have an integer portion equal
129203Scognetto 1, the `zExp' input should be 1 less than the desired result exponent
129203Scognetwhenever `zSig0' and `zSig1' concatenated form a complete, normalized
129203Scognetsignificand.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203ScognetINLINE float128
129203Scognet packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
129203Scognet{
129203Scognet    float128 z;
129203Scognet
129203Scognet    z.low = zSig1;
129203Scognet    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
129203Scognetand extended significand formed by the concatenation of `zSig0', `zSig1',
129203Scognetand `zSig2', and returns the proper quadruple-precision floating-point value
129203Scognetcorresponding to the abstract input.  Ordinarily, the abstract value is
129203Scognetsimply rounded and packed into the quadruple-precision format, with the
129203Scognetinexact exception raised if the abstract input cannot be represented
129203Scognetexactly.  However, if the abstract value is too large, the overflow and
129203Scognetinexact exceptions are raised and an infinity or maximal finite value is
129203Scognetreturned.  If the abstract value is too small, the input value is rounded to
129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if
129203Scognetthe abstract input cannot be represented exactly as a subnormal quadruple-
129203Scognetprecision floating-point number.
129203Scognet    The input significand must be normalized or smaller.  If the input
129203Scognetsignificand is not normalized, `zExp' must be 0; in that case, the result
129203Scognetreturned is a subnormal number, and it must not require rounding.  In the
129203Scognetusual case that the input significand is normalized, `zExp' must be 1 less
129203Scognetthan the ``true'' floating-point exponent.  The handling of underflow and
129203Scognetoverflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic float128
129203Scognet roundAndPackFloat128(
129203Scognet     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
129203Scognet{
129203Scognet    int8 roundingMode;
129203Scognet    flag roundNearestEven, increment, isTiny;
129203Scognet
129203Scognet    roundingMode = float_rounding_mode;
129203Scognet    roundNearestEven = ( roundingMode == float_round_nearest_even );
129203Scognet    increment = ( (sbits64) zSig2 < 0 );
129203Scognet    if ( ! roundNearestEven ) {
129203Scognet        if ( roundingMode == float_round_to_zero ) {
129203Scognet            increment = 0;
129203Scognet        }
129203Scognet        else {
129203Scognet            if ( zSign ) {
129203Scognet                increment = ( roundingMode == float_round_down ) && zSig2;
129203Scognet            }
129203Scognet            else {
129203Scognet                increment = ( roundingMode == float_round_up ) && zSig2;
129203Scognet            }
129203Scognet        }
129203Scognet    }
129203Scognet    if ( 0x7FFD <= (bits32) zExp ) {
129203Scognet        if (    ( 0x7FFD < zExp )
129203Scognet             || (    ( zExp == 0x7FFD )
129203Scognet                  && eq128(
129203Scognet                         LIT64( 0x0001FFFFFFFFFFFF ),
129203Scognet                         LIT64( 0xFFFFFFFFFFFFFFFF ),
129203Scognet                         zSig0,
129203Scognet                         zSig1
129203Scognet                     )
129203Scognet                  && increment
129203Scognet                )
129203Scognet           ) {
129203Scognet            float_raise( float_flag_overflow | float_flag_inexact );
129203Scognet            if (    ( roundingMode == float_round_to_zero )
129203Scognet                 || ( zSign && ( roundingMode == float_round_up ) )
129203Scognet                 || ( ! zSign && ( roundingMode == float_round_down ) )
129203Scognet               ) {
129203Scognet                return
129203Scognet                    packFloat128(
129203Scognet                        zSign,
129203Scognet                        0x7FFE,
129203Scognet                        LIT64( 0x0000FFFFFFFFFFFF ),
129203Scognet                        LIT64( 0xFFFFFFFFFFFFFFFF )
129203Scognet                    );
129203Scognet            }
129203Scognet            return packFloat128( zSign, 0x7FFF, 0, 0 );
129203Scognet        }
129203Scognet        if ( zExp < 0 ) {
129203Scognet            isTiny =
129203Scognet                   ( float_detect_tininess == float_tininess_before_rounding )
129203Scognet                || ( zExp < -1 )
129203Scognet                || ! increment
129203Scognet                || lt128(
129203Scognet                       zSig0,
129203Scognet                       zSig1,
129203Scognet                       LIT64( 0x0001FFFFFFFFFFFF ),
129203Scognet                       LIT64( 0xFFFFFFFFFFFFFFFF )
129203Scognet                   );
129203Scognet            shift128ExtraRightJamming(
129203Scognet                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
129203Scognet            zExp = 0;
129203Scognet            if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
129203Scognet            if ( roundNearestEven ) {
129203Scognet                increment = ( (sbits64) zSig2 < 0 );
129203Scognet            }
129203Scognet            else {
129203Scognet                if ( zSign ) {
129203Scognet                    increment = ( roundingMode == float_round_down ) && zSig2;
129203Scognet                }
129203Scognet                else {
129203Scognet                    increment = ( roundingMode == float_round_up ) && zSig2;
129203Scognet                }
129203Scognet            }
129203Scognet        }
129203Scognet    }
129203Scognet    if ( zSig2 ) float_exception_flags |= float_flag_inexact;
129203Scognet    if ( increment ) {
129203Scognet        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
129203Scognet        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
129203Scognet    }
129203Scognet    else {
129203Scognet        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
129203Scognet    }
129203Scognet    return packFloat128( zSign, zExp, zSig0, zSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp',
129203Scognetand significand formed by the concatenation of `zSig0' and `zSig1', and
129203Scognetreturns the proper quadruple-precision floating-point value corresponding
129203Scognetto the abstract input.  This routine is just like `roundAndPackFloat128'
129203Scognetexcept that the input significand has fewer bits and does not have to be
129203Scognetnormalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
129203Scognetpoint exponent.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic float128
129203Scognet normalizeRoundAndPackFloat128(
129203Scognet     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
129203Scognet{
129203Scognet    int8 shiftCount;
129203Scognet    bits64 zSig2;
129203Scognet
129203Scognet    if ( zSig0 == 0 ) {
129203Scognet        zSig0 = zSig1;
129203Scognet        zSig1 = 0;
129203Scognet        zExp -= 64;
129203Scognet    }
129203Scognet    shiftCount = countLeadingZeros64( zSig0 ) - 15;
129203Scognet    if ( 0 <= shiftCount ) {
129203Scognet        zSig2 = 0;
129203Scognet        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
129203Scognet    }
129203Scognet    else {
129203Scognet        shift128ExtraRightJamming(
129203Scognet            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
129203Scognet    }
129203Scognet    zExp -= shiftCount;
129203Scognet    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the 32-bit two's complement integer `a'
129203Scognetto the single-precision floating-point format.  The conversion is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat32 int32_to_float32( int32 a )
129203Scognet{
129203Scognet    flag zSign;
129203Scognet
129203Scognet    if ( a == 0 ) return 0;
129203Scognet    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
129203Scognet    zSign = ( a < 0 );
129203Scognet    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
129203Scognet
129203Scognet}
129203Scognet
230380Sdas#ifndef SOFTFLOAT_FOR_GCC /* __floatunsisf is in libgcc */
230363Sdasfloat32 uint32_to_float32( uint32 a )
230363Sdas{
230363Sdas    if ( a == 0 ) return 0;
230363Sdas    if ( a & (bits32) 0x80000000 )
230363Sdas	return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 );
230363Sdas    return normalizeRoundAndPackFloat32( 0, 0x9C, a );
230363Sdas}
230380Sdas#endif
230363Sdas
230363Sdas
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the 32-bit two's complement integer `a'
129203Scognetto the double-precision floating-point format.  The conversion is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat64 int32_to_float64( int32 a )
129203Scognet{
129203Scognet    flag zSign;
129203Scognet    uint32 absA;
129203Scognet    int8 shiftCount;
129203Scognet    bits64 zSig;
129203Scognet
129203Scognet    if ( a == 0 ) return 0;
129203Scognet    zSign = ( a < 0 );
129203Scognet    absA = zSign ? - a : a;
129203Scognet    shiftCount = countLeadingZeros32( absA ) + 21;
129203Scognet    zSig = absA;
129203Scognet    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
129203Scognet
129203Scognet}
129203Scognet
230380Sdas#ifndef SOFTFLOAT_FOR_GCC /* __floatunsidf is in libgcc */
230363Sdasfloat64 uint32_to_float64( uint32 a )
230363Sdas{
230363Sdas    int8 shiftCount;
230363Sdas    bits64 zSig = a;
230363Sdas
230363Sdas    if ( a == 0 ) return 0;
230363Sdas    shiftCount = countLeadingZeros32( a ) + 21;
230363Sdas    return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount );
230363Sdas
230363Sdas}
230380Sdas#endif
230363Sdas
129203Scognet#ifdef FLOATX80
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the 32-bit two's complement integer `a'
129203Scognetto the extended double-precision floating-point format.  The conversion
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloatx80 int32_to_floatx80( int32 a )
129203Scognet{
129203Scognet    flag zSign;
129203Scognet    uint32 absA;
129203Scognet    int8 shiftCount;
129203Scognet    bits64 zSig;
129203Scognet
129203Scognet    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
129203Scognet    zSign = ( a < 0 );
129203Scognet    absA = zSign ? - a : a;
129203Scognet    shiftCount = countLeadingZeros32( absA ) + 32;
129203Scognet    zSig = absA;
129203Scognet    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
129203Scognet
129203Scognet}
129203Scognet
230363Sdasfloatx80 uint32_to_floatx80( uint32 a )
230363Sdas{
230363Sdas    int8 shiftCount;
230363Sdas    bits64 zSig = a;
230363Sdas
230363Sdas    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
230363Sdas    shiftCount = countLeadingZeros32( a ) + 32;
230363Sdas    return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount );
230363Sdas
230363Sdas}
230363Sdas
129203Scognet#endif
129203Scognet
129203Scognet#ifdef FLOAT128
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the 32-bit two's complement integer `a' to
129203Scognetthe quadruple-precision floating-point format.  The conversion is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat128 int32_to_float128( int32 a )
129203Scognet{
129203Scognet    flag zSign;
129203Scognet    uint32 absA;
129203Scognet    int8 shiftCount;
129203Scognet    bits64 zSig0;
129203Scognet
129203Scognet    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
129203Scognet    zSign = ( a < 0 );
129203Scognet    absA = zSign ? - a : a;
129203Scognet    shiftCount = countLeadingZeros32( absA ) + 17;
129203Scognet    zSig0 = absA;
129203Scognet    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
129203Scognet
129203Scognet}
129203Scognet
230363Sdasfloat128 uint32_to_float128( uint32 a )
230363Sdas{
230363Sdas    int8 shiftCount;
230363Sdas    bits64 zSig0 = a;
230363Sdas
230363Sdas    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
230363Sdas    shiftCount = countLeadingZeros32( a ) + 17;
230363Sdas    return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
230363Sdas
230363Sdas}
230363Sdas
129203Scognet#endif
129203Scognet
129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the 64-bit two's complement integer `a'
129203Scognetto the single-precision floating-point format.  The conversion is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat32 int64_to_float32( int64 a )
129203Scognet{
129203Scognet    flag zSign;
129203Scognet    uint64 absA;
129203Scognet    int8 shiftCount;
129203Scognet
129203Scognet    if ( a == 0 ) return 0;
129203Scognet    zSign = ( a < 0 );
129203Scognet    absA = zSign ? - a : a;
129203Scognet    shiftCount = countLeadingZeros64( absA ) - 40;
129203Scognet    if ( 0 <= shiftCount ) {
129203Scognet        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
129203Scognet    }
129203Scognet    else {
129203Scognet        shiftCount += 7;
129203Scognet        if ( shiftCount < 0 ) {
129203Scognet            shift64RightJamming( absA, - shiftCount, &absA );
129203Scognet        }
129203Scognet        else {
129203Scognet            absA <<= shiftCount;
129203Scognet        }
129203Scognet        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
129203Scognet    }
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the 64-bit two's complement integer `a'
129203Scognetto the double-precision floating-point format.  The conversion is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat64 int64_to_float64( int64 a )
129203Scognet{
129203Scognet    flag zSign;
129203Scognet
129203Scognet    if ( a == 0 ) return 0;
129203Scognet    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
129203Scognet        return packFloat64( 1, 0x43E, 0 );
129203Scognet    }
129203Scognet    zSign = ( a < 0 );
129203Scognet    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#ifdef FLOATX80
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the 64-bit two's complement integer `a'
129203Scognetto the extended double-precision floating-point format.  The conversion
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloatx80 int64_to_floatx80( int64 a )
129203Scognet{
129203Scognet    flag zSign;
129203Scognet    uint64 absA;
129203Scognet    int8 shiftCount;
129203Scognet
129203Scognet    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
129203Scognet    zSign = ( a < 0 );
129203Scognet    absA = zSign ? - a : a;
129203Scognet    shiftCount = countLeadingZeros64( absA );
129203Scognet    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif
129203Scognet
129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
129203Scognet
129203Scognet#ifdef FLOAT128
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the 64-bit two's complement integer `a' to
129203Scognetthe quadruple-precision floating-point format.  The conversion is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat128 int64_to_float128( int64 a )
129203Scognet{
129203Scognet    flag zSign;
129203Scognet    uint64 absA;
129203Scognet    int8 shiftCount;
129203Scognet    int32 zExp;
129203Scognet    bits64 zSig0, zSig1;
129203Scognet
129203Scognet    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
129203Scognet    zSign = ( a < 0 );
129203Scognet    absA = zSign ? - a : a;
129203Scognet    shiftCount = countLeadingZeros64( absA ) + 49;
129203Scognet    zExp = 0x406E - shiftCount;
129203Scognet    if ( 64 <= shiftCount ) {
129203Scognet        zSig1 = 0;
129203Scognet        zSig0 = absA;
129203Scognet        shiftCount -= 64;
129203Scognet    }
129203Scognet    else {
129203Scognet        zSig1 = absA;
129203Scognet        zSig0 = 0;
129203Scognet    }
129203Scognet    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
129203Scognet    return packFloat128( zSign, zExp, zSig0, zSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif
129203Scognet
129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the single-precision floating-point value
129203Scognet`a' to the 32-bit two's complement integer format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic---which means in particular that the conversion is rounded
129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
129203Scognetlargest integer with the same sign as `a' is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint32 float32_to_int32( float32 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp, shiftCount;
129203Scognet    bits32 aSig;
129203Scognet    bits64 aSig64;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
129203Scognet    if ( aExp ) aSig |= 0x00800000;
129203Scognet    shiftCount = 0xAF - aExp;
129203Scognet    aSig64 = aSig;
129203Scognet    aSig64 <<= 32;
129203Scognet    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
129203Scognet    return roundAndPackInt32( aSign, aSig64 );
129203Scognet
129203Scognet}
129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the single-precision floating-point value
129203Scognet`a' to the 32-bit two's complement integer format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic, except that the conversion is always rounded toward zero.
129203ScognetIf `a' is a NaN, the largest positive integer is returned.  Otherwise, if
129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is
129203Scognetreturned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint32 float32_to_int32_round_to_zero( float32 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp, shiftCount;
129203Scognet    bits32 aSig;
129203Scognet    int32 z;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    shiftCount = aExp - 0x9E;
129203Scognet    if ( 0 <= shiftCount ) {
129203Scognet        if ( a != 0xCF000000 ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
129203Scognet        }
129203Scognet        return (sbits32) 0x80000000;
129203Scognet    }
129203Scognet    else if ( aExp <= 0x7E ) {
129203Scognet        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSig = ( aSig | 0x00800000 )<<8;
129203Scognet    z = aSig>>( - shiftCount );
129203Scognet    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
129203Scognet        float_exception_flags |= float_flag_inexact;
129203Scognet    }
129203Scognet    if ( aSign ) z = - z;
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the single-precision floating-point value
129203Scognet`a' to the 64-bit two's complement integer format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic---which means in particular that the conversion is rounded
129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
129203Scognetlargest integer with the same sign as `a' is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint64 float32_to_int64( float32 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp, shiftCount;
129203Scognet    bits32 aSig;
129203Scognet    bits64 aSig64, aSigExtra;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    shiftCount = 0xBE - aExp;
129203Scognet    if ( shiftCount < 0 ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
129203Scognet            return LIT64( 0x7FFFFFFFFFFFFFFF );
129203Scognet        }
129203Scognet        return (sbits64) LIT64( 0x8000000000000000 );
129203Scognet    }
129203Scognet    if ( aExp ) aSig |= 0x00800000;
129203Scognet    aSig64 = aSig;
129203Scognet    aSig64 <<= 40;
129203Scognet    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
129203Scognet    return roundAndPackInt64( aSign, aSig64, aSigExtra );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the single-precision floating-point value
129203Scognet`a' to the 64-bit two's complement integer format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic, except that the conversion is always rounded toward zero.  If
129203Scognet`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
129203Scognetconversion overflows, the largest integer with the same sign as `a' is
129203Scognetreturned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint64 float32_to_int64_round_to_zero( float32 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp, shiftCount;
129203Scognet    bits32 aSig;
129203Scognet    bits64 aSig64;
129203Scognet    int64 z;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    shiftCount = aExp - 0xBE;
129203Scognet    if ( 0 <= shiftCount ) {
129203Scognet        if ( a != 0xDF000000 ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
129203Scognet            }
129203Scognet        }
129203Scognet        return (sbits64) LIT64( 0x8000000000000000 );
129203Scognet    }
129203Scognet    else if ( aExp <= 0x7E ) {
129203Scognet        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSig64 = aSig | 0x00800000;
129203Scognet    aSig64 <<= 40;
129203Scognet    z = aSig64>>( - shiftCount );
129203Scognet    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
129203Scognet        float_exception_flags |= float_flag_inexact;
129203Scognet    }
129203Scognet    if ( aSign ) z = - z;
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the single-precision floating-point value
129203Scognet`a' to the double-precision floating-point format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat64 float32_to_float64( float32 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp;
129203Scognet    bits32 aSig;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    if ( aExp == 0xFF ) {
129203Scognet        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
129203Scognet        return packFloat64( aSign, 0x7FF, 0 );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
129203Scognet        --aExp;
129203Scognet    }
129203Scognet    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#ifdef FLOATX80
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the single-precision floating-point value
129203Scognet`a' to the extended double-precision floating-point format.  The conversion
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloatx80 float32_to_floatx80( float32 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp;
129203Scognet    bits32 aSig;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    if ( aExp == 0xFF ) {
129203Scognet        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
129203Scognet        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
129203Scognet    }
129203Scognet    aSig |= 0x00800000;
129203Scognet    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif
129203Scognet
129203Scognet#ifdef FLOAT128
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the single-precision floating-point value
129203Scognet`a' to the double-precision floating-point format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat128 float32_to_float128( float32 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp;
129203Scognet    bits32 aSig;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    if ( aExp == 0xFF ) {
129203Scognet        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
129203Scognet        return packFloat128( aSign, 0x7FFF, 0, 0 );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
129203Scognet        --aExp;
129203Scognet    }
129203Scognet    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif
129203Scognet
129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetRounds the single-precision floating-point value `a' to an integer, and
129203Scognetreturns the result as a single-precision floating-point value.  The
129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat32 float32_round_to_int( float32 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp;
129203Scognet    bits32 lastBitMask, roundBitsMask;
129203Scognet    int8 roundingMode;
129203Scognet    float32 z;
129203Scognet
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    if ( 0x96 <= aExp ) {
129203Scognet        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
129203Scognet            return propagateFloat32NaN( a, a );
129203Scognet        }
129203Scognet        return a;
129203Scognet    }
129203Scognet    if ( aExp <= 0x7E ) {
129203Scognet        if ( (bits32) ( a<<1 ) == 0 ) return a;
129203Scognet        float_exception_flags |= float_flag_inexact;
129203Scognet        aSign = extractFloat32Sign( a );
129203Scognet        switch ( float_rounding_mode ) {
129203Scognet         case float_round_nearest_even:
129203Scognet            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
129203Scognet                return packFloat32( aSign, 0x7F, 0 );
129203Scognet            }
129203Scognet            break;
129203Scognet	 case float_round_to_zero:
129203Scognet	    break;
129203Scognet         case float_round_down:
129203Scognet            return aSign ? 0xBF800000 : 0;
129203Scognet         case float_round_up:
129203Scognet            return aSign ? 0x80000000 : 0x3F800000;
129203Scognet        }
129203Scognet        return packFloat32( aSign, 0, 0 );
129203Scognet    }
129203Scognet    lastBitMask = 1;
129203Scognet    lastBitMask <<= 0x96 - aExp;
129203Scognet    roundBitsMask = lastBitMask - 1;
129203Scognet    z = a;
129203Scognet    roundingMode = float_rounding_mode;
129203Scognet    if ( roundingMode == float_round_nearest_even ) {
129203Scognet        z += lastBitMask>>1;
129203Scognet        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
129203Scognet    }
129203Scognet    else if ( roundingMode != float_round_to_zero ) {
129203Scognet        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
129203Scognet            z += roundBitsMask;
129203Scognet        }
129203Scognet    }
129203Scognet    z &= ~ roundBitsMask;
129203Scognet    if ( z != a ) float_exception_flags |= float_flag_inexact;
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of adding the absolute values of the single-precision
129203Scognetfloating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
129203Scognetbefore being returned.  `zSign' is ignored if the result is a NaN.
129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
129203Scognet{
129203Scognet    int16 aExp, bExp, zExp;
129203Scognet    bits32 aSig, bSig, zSig;
129203Scognet    int16 expDiff;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    bSig = extractFloat32Frac( b );
129203Scognet    bExp = extractFloat32Exp( b );
129203Scognet    expDiff = aExp - bExp;
129203Scognet    aSig <<= 6;
129203Scognet    bSig <<= 6;
129203Scognet    if ( 0 < expDiff ) {
129203Scognet        if ( aExp == 0xFF ) {
129203Scognet            if ( aSig ) return propagateFloat32NaN( a, b );
129203Scognet            return a;
129203Scognet        }
129203Scognet        if ( bExp == 0 ) {
129203Scognet            --expDiff;
129203Scognet        }
129203Scognet        else {
129203Scognet            bSig |= 0x20000000;
129203Scognet        }
129203Scognet        shift32RightJamming( bSig, expDiff, &bSig );
129203Scognet        zExp = aExp;
129203Scognet    }
129203Scognet    else if ( expDiff < 0 ) {
129203Scognet        if ( bExp == 0xFF ) {
129203Scognet            if ( bSig ) return propagateFloat32NaN( a, b );
129203Scognet            return packFloat32( zSign, 0xFF, 0 );
129203Scognet        }
129203Scognet        if ( aExp == 0 ) {
129203Scognet            ++expDiff;
129203Scognet        }
129203Scognet        else {
129203Scognet            aSig |= 0x20000000;
129203Scognet        }
129203Scognet        shift32RightJamming( aSig, - expDiff, &aSig );
129203Scognet        zExp = bExp;
129203Scognet    }
129203Scognet    else {
129203Scognet        if ( aExp == 0xFF ) {
129203Scognet            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
129203Scognet            return a;
129203Scognet        }
129203Scognet        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
129203Scognet        zSig = 0x40000000 + aSig + bSig;
129203Scognet        zExp = aExp;
129203Scognet        goto roundAndPack;
129203Scognet    }
129203Scognet    aSig |= 0x20000000;
129203Scognet    zSig = ( aSig + bSig )<<1;
129203Scognet    --zExp;
129203Scognet    if ( (sbits32) zSig < 0 ) {
129203Scognet        zSig = aSig + bSig;
129203Scognet        ++zExp;
129203Scognet    }
129203Scognet roundAndPack:
129203Scognet    return roundAndPackFloat32( zSign, zExp, zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of subtracting the absolute values of the single-
129203Scognetprecision floating-point values `a' and `b'.  If `zSign' is 1, the
129203Scognetdifference is negated before being returned.  `zSign' is ignored if the
129203Scognetresult is a NaN.  The subtraction is performed according to the IEC/IEEE
129203ScognetStandard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
129203Scognet{
129203Scognet    int16 aExp, bExp, zExp;
129203Scognet    bits32 aSig, bSig, zSig;
129203Scognet    int16 expDiff;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    bSig = extractFloat32Frac( b );
129203Scognet    bExp = extractFloat32Exp( b );
129203Scognet    expDiff = aExp - bExp;
129203Scognet    aSig <<= 7;
129203Scognet    bSig <<= 7;
129203Scognet    if ( 0 < expDiff ) goto aExpBigger;
129203Scognet    if ( expDiff < 0 ) goto bExpBigger;
129203Scognet    if ( aExp == 0xFF ) {
129203Scognet        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return float32_default_nan;
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        aExp = 1;
129203Scognet        bExp = 1;
129203Scognet    }
129203Scognet    if ( bSig < aSig ) goto aBigger;
129203Scognet    if ( aSig < bSig ) goto bBigger;
129203Scognet    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
129203Scognet bExpBigger:
129203Scognet    if ( bExp == 0xFF ) {
129203Scognet        if ( bSig ) return propagateFloat32NaN( a, b );
129203Scognet        return packFloat32( zSign ^ 1, 0xFF, 0 );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        ++expDiff;
129203Scognet    }
129203Scognet    else {
129203Scognet        aSig |= 0x40000000;
129203Scognet    }
129203Scognet    shift32RightJamming( aSig, - expDiff, &aSig );
129203Scognet    bSig |= 0x40000000;
129203Scognet bBigger:
129203Scognet    zSig = bSig - aSig;
129203Scognet    zExp = bExp;
129203Scognet    zSign ^= 1;
129203Scognet    goto normalizeRoundAndPack;
129203Scognet aExpBigger:
129203Scognet    if ( aExp == 0xFF ) {
129203Scognet        if ( aSig ) return propagateFloat32NaN( a, b );
129203Scognet        return a;
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        --expDiff;
129203Scognet    }
129203Scognet    else {
129203Scognet        bSig |= 0x40000000;
129203Scognet    }
129203Scognet    shift32RightJamming( bSig, expDiff, &bSig );
129203Scognet    aSig |= 0x40000000;
129203Scognet aBigger:
129203Scognet    zSig = aSig - bSig;
129203Scognet    zExp = aExp;
129203Scognet normalizeRoundAndPack:
129203Scognet    --zExp;
129203Scognet    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of adding the single-precision floating-point values `a'
129203Scognetand `b'.  The operation is performed according to the IEC/IEEE Standard for
129203ScognetBinary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat32 float32_add( float32 a, float32 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    bSign = extractFloat32Sign( b );
129203Scognet    if ( aSign == bSign ) {
129203Scognet        return addFloat32Sigs( a, b, aSign );
129203Scognet    }
129203Scognet    else {
129203Scognet        return subFloat32Sigs( a, b, aSign );
129203Scognet    }
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of subtracting the single-precision floating-point values
129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
129203Scognetfor Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat32 float32_sub( float32 a, float32 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    bSign = extractFloat32Sign( b );
129203Scognet    if ( aSign == bSign ) {
129203Scognet        return subFloat32Sigs( a, b, aSign );
129203Scognet    }
129203Scognet    else {
129203Scognet        return addFloat32Sigs( a, b, aSign );
129203Scognet    }
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of multiplying the single-precision floating-point values
129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
129203Scognetfor Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat32 float32_mul( float32 a, float32 b )
129203Scognet{
129203Scognet    flag aSign, bSign, zSign;
129203Scognet    int16 aExp, bExp, zExp;
129203Scognet    bits32 aSig, bSig;
129203Scognet    bits64 zSig64;
129203Scognet    bits32 zSig;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    bSig = extractFloat32Frac( b );
129203Scognet    bExp = extractFloat32Exp( b );
129203Scognet    bSign = extractFloat32Sign( b );
129203Scognet    zSign = aSign ^ bSign;
129203Scognet    if ( aExp == 0xFF ) {
129203Scognet        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
129203Scognet            return propagateFloat32NaN( a, b );
129203Scognet        }
129203Scognet        if ( ( bExp | bSig ) == 0 ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            return float32_default_nan;
129203Scognet        }
129203Scognet        return packFloat32( zSign, 0xFF, 0 );
129203Scognet    }
129203Scognet    if ( bExp == 0xFF ) {
129203Scognet        if ( bSig ) return propagateFloat32NaN( a, b );
129203Scognet        if ( ( aExp | aSig ) == 0 ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            return float32_default_nan;
129203Scognet        }
129203Scognet        return packFloat32( zSign, 0xFF, 0 );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
129203Scognet        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
129203Scognet    }
129203Scognet    zExp = aExp + bExp - 0x7F;
129203Scognet    aSig = ( aSig | 0x00800000 )<<7;
129203Scognet    bSig = ( bSig | 0x00800000 )<<8;
129203Scognet    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
129203Scognet    zSig = zSig64;
129203Scognet    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
129203Scognet        zSig <<= 1;
129203Scognet        --zExp;
129203Scognet    }
129203Scognet    return roundAndPackFloat32( zSign, zExp, zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of dividing the single-precision floating-point value `a'
129203Scognetby the corresponding value `b'.  The operation is performed according to the
129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat32 float32_div( float32 a, float32 b )
129203Scognet{
129203Scognet    flag aSign, bSign, zSign;
129203Scognet    int16 aExp, bExp, zExp;
129203Scognet    bits32 aSig, bSig, zSig;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    bSig = extractFloat32Frac( b );
129203Scognet    bExp = extractFloat32Exp( b );
129203Scognet    bSign = extractFloat32Sign( b );
129203Scognet    zSign = aSign ^ bSign;
129203Scognet    if ( aExp == 0xFF ) {
129203Scognet        if ( aSig ) return propagateFloat32NaN( a, b );
129203Scognet        if ( bExp == 0xFF ) {
129203Scognet            if ( bSig ) return propagateFloat32NaN( a, b );
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            return float32_default_nan;
129203Scognet        }
129203Scognet        return packFloat32( zSign, 0xFF, 0 );
129203Scognet    }
129203Scognet    if ( bExp == 0xFF ) {
129203Scognet        if ( bSig ) return propagateFloat32NaN( a, b );
129203Scognet        return packFloat32( zSign, 0, 0 );
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        if ( bSig == 0 ) {
129203Scognet            if ( ( aExp | aSig ) == 0 ) {
129203Scognet                float_raise( float_flag_invalid );
129203Scognet                return float32_default_nan;
129203Scognet            }
129203Scognet            float_raise( float_flag_divbyzero );
129203Scognet            return packFloat32( zSign, 0xFF, 0 );
129203Scognet        }
129203Scognet        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
129203Scognet    }
129203Scognet    zExp = aExp - bExp + 0x7D;
129203Scognet    aSig = ( aSig | 0x00800000 )<<7;
129203Scognet    bSig = ( bSig | 0x00800000 )<<8;
129203Scognet    if ( bSig <= ( aSig + aSig ) ) {
129203Scognet        aSig >>= 1;
129203Scognet        ++zExp;
129203Scognet    }
129203Scognet    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
129203Scognet    if ( ( zSig & 0x3F ) == 0 ) {
129203Scognet        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
129203Scognet    }
129203Scognet    return roundAndPackFloat32( zSign, zExp, zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the remainder of the single-precision floating-point value `a'
129203Scognetwith respect to the corresponding value `b'.  The operation is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat32 float32_rem( float32 a, float32 b )
129203Scognet{
129203Scognet    flag aSign, bSign, zSign;
129203Scognet    int16 aExp, bExp, expDiff;
129203Scognet    bits32 aSig, bSig;
129203Scognet    bits32 q;
129203Scognet    bits64 aSig64, bSig64, q64;
129203Scognet    bits32 alternateASig;
129203Scognet    sbits32 sigMean;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    bSig = extractFloat32Frac( b );
129203Scognet    bExp = extractFloat32Exp( b );
129203Scognet    bSign = extractFloat32Sign( b );
129203Scognet    if ( aExp == 0xFF ) {
129203Scognet        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
129203Scognet            return propagateFloat32NaN( a, b );
129203Scognet        }
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return float32_default_nan;
129203Scognet    }
129203Scognet    if ( bExp == 0xFF ) {
129203Scognet        if ( bSig ) return propagateFloat32NaN( a, b );
129203Scognet        return a;
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        if ( bSig == 0 ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            return float32_default_nan;
129203Scognet        }
129203Scognet        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return a;
129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
129203Scognet    }
129203Scognet    expDiff = aExp - bExp;
129203Scognet    aSig |= 0x00800000;
129203Scognet    bSig |= 0x00800000;
129203Scognet    if ( expDiff < 32 ) {
129203Scognet        aSig <<= 8;
129203Scognet        bSig <<= 8;
129203Scognet        if ( expDiff < 0 ) {
129203Scognet            if ( expDiff < -1 ) return a;
129203Scognet            aSig >>= 1;
129203Scognet        }
129203Scognet        q = ( bSig <= aSig );
129203Scognet        if ( q ) aSig -= bSig;
129203Scognet        if ( 0 < expDiff ) {
129203Scognet            q = ( ( (bits64) aSig )<<32 ) / bSig;
129203Scognet            q >>= 32 - expDiff;
129203Scognet            bSig >>= 2;
129203Scognet            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
129203Scognet        }
129203Scognet        else {
129203Scognet            aSig >>= 2;
129203Scognet            bSig >>= 2;
129203Scognet        }
129203Scognet    }
129203Scognet    else {
129203Scognet        if ( bSig <= aSig ) aSig -= bSig;
129203Scognet        aSig64 = ( (bits64) aSig )<<40;
129203Scognet        bSig64 = ( (bits64) bSig )<<40;
129203Scognet        expDiff -= 64;
129203Scognet        while ( 0 < expDiff ) {
129203Scognet            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
129203Scognet            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
129203Scognet            aSig64 = - ( ( bSig * q64 )<<38 );
129203Scognet            expDiff -= 62;
129203Scognet        }
129203Scognet        expDiff += 64;
129203Scognet        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
129203Scognet        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
129203Scognet        q = q64>>( 64 - expDiff );
129203Scognet        bSig <<= 6;
129203Scognet        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
129203Scognet    }
129203Scognet    do {
129203Scognet        alternateASig = aSig;
129203Scognet        ++q;
129203Scognet        aSig -= bSig;
129203Scognet    } while ( 0 <= (sbits32) aSig );
129203Scognet    sigMean = aSig + alternateASig;
129203Scognet    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
129203Scognet        aSig = alternateASig;
129203Scognet    }
129203Scognet    zSign = ( (sbits32) aSig < 0 );
129203Scognet    if ( zSign ) aSig = - aSig;
129203Scognet    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
129203Scognet
129203Scognet}
129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
129203Scognet
129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the square root of the single-precision floating-point value `a'.
129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat32 float32_sqrt( float32 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp, zExp;
129203Scognet    bits32 aSig, zSig;
129203Scognet    bits64 rem, term;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    if ( aExp == 0xFF ) {
129203Scognet        if ( aSig ) return propagateFloat32NaN( a, 0 );
129203Scognet        if ( ! aSign ) return a;
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return float32_default_nan;
129203Scognet    }
129203Scognet    if ( aSign ) {
129203Scognet        if ( ( aExp | aSig ) == 0 ) return a;
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return float32_default_nan;
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return 0;
129203Scognet        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
129203Scognet    }
129203Scognet    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
129203Scognet    aSig = ( aSig | 0x00800000 )<<8;
129203Scognet    zSig = estimateSqrt32( aExp, aSig ) + 2;
129203Scognet    if ( ( zSig & 0x7F ) <= 5 ) {
129203Scognet        if ( zSig < 2 ) {
129203Scognet            zSig = 0x7FFFFFFF;
129203Scognet            goto roundAndPack;
129203Scognet        }
129203Scognet        aSig >>= aExp & 1;
129203Scognet        term = ( (bits64) zSig ) * zSig;
129203Scognet        rem = ( ( (bits64) aSig )<<32 ) - term;
129203Scognet        while ( (sbits64) rem < 0 ) {
129203Scognet            --zSig;
129203Scognet            rem += ( ( (bits64) zSig )<<1 ) | 1;
129203Scognet        }
129203Scognet        zSig |= ( rem != 0 );
129203Scognet    }
129203Scognet    shift32RightJamming( zSig, 1, &zSig );
129203Scognet roundAndPack:
129203Scognet    return roundAndPackFloat32( 0, zExp, zSig );
129203Scognet
129203Scognet}
129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the single-precision floating-point value `a' is equal to
129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float32_eq( float32 a, float32 b )
129203Scognet{
129203Scognet
129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
129203Scognet       ) {
129203Scognet        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet        }
129203Scognet        return 0;
129203Scognet    }
129203Scognet    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the single-precision floating-point value `a' is less than
129203Scognetor equal to the corresponding value `b', and 0 otherwise.  The comparison
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float32_le( float32 a, float32 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
129203Scognet       ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    bSign = extractFloat32Sign( b );
129203Scognet    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
129203Scognet    return ( a == b ) || ( aSign ^ ( a < b ) );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the single-precision floating-point value `a' is less than
129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float32_lt( float32 a, float32 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
129203Scognet       ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    bSign = extractFloat32Sign( b );
129203Scognet    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
129203Scognet    return ( a != b ) && ( aSign ^ ( a < b ) );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the single-precision floating-point value `a' is equal to
129203Scognetthe corresponding value `b', and 0 otherwise.  The invalid exception is
129203Scognetraised if either operand is a NaN.  Otherwise, the comparison is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float32_eq_signaling( float32 a, float32 b )
129203Scognet{
129203Scognet
129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
129203Scognet       ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0;
129203Scognet    }
129203Scognet    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the single-precision floating-point value `a' is less than or
129203Scognetequal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
129203Scognetcause an exception.  Otherwise, the comparison is performed according to the
129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float32_le_quiet( float32 a, float32 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
129203Scognet       ) {
129203Scognet        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet        }
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    bSign = extractFloat32Sign( b );
129203Scognet    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
129203Scognet    return ( a == b ) || ( aSign ^ ( a < b ) );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the single-precision floating-point value `a' is less than
129203Scognetthe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
129203Scognetexception.  Otherwise, the comparison is performed according to the IEC/IEEE
129203ScognetStandard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float32_lt_quiet( float32 a, float32 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
129203Scognet         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
129203Scognet       ) {
129203Scognet        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet        }
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    bSign = extractFloat32Sign( b );
129203Scognet    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
129203Scognet    return ( a != b ) && ( aSign ^ ( a < b ) );
129203Scognet
129203Scognet}
129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
129203Scognet
129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the double-precision floating-point value
129203Scognet`a' to the 32-bit two's complement integer format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic---which means in particular that the conversion is rounded
129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
129203Scognetlargest integer with the same sign as `a' is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint32 float64_to_int32( float64 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp, shiftCount;
129203Scognet    bits64 aSig;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
129203Scognet    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
129203Scognet    shiftCount = 0x42C - aExp;
129203Scognet    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
129203Scognet    return roundAndPackInt32( aSign, aSig );
129203Scognet
129203Scognet}
129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the double-precision floating-point value
129203Scognet`a' to the 32-bit two's complement integer format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic, except that the conversion is always rounded toward zero.
129203ScognetIf `a' is a NaN, the largest positive integer is returned.  Otherwise, if
129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is
129203Scognetreturned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint32 float64_to_int32_round_to_zero( float64 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp, shiftCount;
129203Scognet    bits64 aSig, savedASig;
129203Scognet    int32 z;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    if ( 0x41E < aExp ) {
129203Scognet        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
129203Scognet        goto invalid;
129203Scognet    }
129203Scognet    else if ( aExp < 0x3FF ) {
129203Scognet        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSig |= LIT64( 0x0010000000000000 );
129203Scognet    shiftCount = 0x433 - aExp;
129203Scognet    savedASig = aSig;
129203Scognet    aSig >>= shiftCount;
129203Scognet    z = aSig;
129203Scognet    if ( aSign ) z = - z;
129203Scognet    if ( ( z < 0 ) ^ aSign ) {
129203Scognet invalid:
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
129203Scognet    }
129203Scognet    if ( ( aSig<<shiftCount ) != savedASig ) {
129203Scognet        float_exception_flags |= float_flag_inexact;
129203Scognet    }
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the double-precision floating-point value
129203Scognet`a' to the 64-bit two's complement integer format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic---which means in particular that the conversion is rounded
129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
129203Scognetlargest integer with the same sign as `a' is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint64 float64_to_int64( float64 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp, shiftCount;
129203Scognet    bits64 aSig, aSigExtra;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
129203Scognet    shiftCount = 0x433 - aExp;
129203Scognet    if ( shiftCount <= 0 ) {
129203Scognet        if ( 0x43E < aExp ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            if (    ! aSign
129203Scognet                 || (    ( aExp == 0x7FF )
129203Scognet                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
129203Scognet               ) {
129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
129203Scognet            }
129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
129203Scognet        }
129203Scognet        aSigExtra = 0;
129203Scognet        aSig <<= - shiftCount;
129203Scognet    }
129203Scognet    else {
129203Scognet        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
129203Scognet    }
129203Scognet    return roundAndPackInt64( aSign, aSig, aSigExtra );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the double-precision floating-point value
129203Scognet`a' to the 64-bit two's complement integer format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic, except that the conversion is always rounded toward zero.
129203ScognetIf `a' is a NaN, the largest positive integer is returned.  Otherwise, if
129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is
129203Scognetreturned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint64 float64_to_int64_round_to_zero( float64 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp, shiftCount;
129203Scognet    bits64 aSig;
129203Scognet    int64 z;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
129203Scognet    shiftCount = aExp - 0x433;
129203Scognet    if ( 0 <= shiftCount ) {
129203Scognet        if ( 0x43E <= aExp ) {
129203Scognet            if ( a != LIT64( 0xC3E0000000000000 ) ) {
129203Scognet                float_raise( float_flag_invalid );
129203Scognet                if (    ! aSign
129203Scognet                     || (    ( aExp == 0x7FF )
129203Scognet                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
129203Scognet                   ) {
129203Scognet                    return LIT64( 0x7FFFFFFFFFFFFFFF );
129203Scognet                }
129203Scognet            }
129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
129203Scognet        }
129203Scognet        z = aSig<<shiftCount;
129203Scognet    }
129203Scognet    else {
129203Scognet        if ( aExp < 0x3FE ) {
129203Scognet            if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
129203Scognet            return 0;
129203Scognet        }
129203Scognet        z = aSig>>( - shiftCount );
129203Scognet        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
129203Scognet            float_exception_flags |= float_flag_inexact;
129203Scognet        }
129203Scognet    }
129203Scognet    if ( aSign ) z = - z;
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the double-precision floating-point value
129203Scognet`a' to the single-precision floating-point format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat32 float64_to_float32( float64 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp;
129203Scognet    bits64 aSig;
129203Scognet    bits32 zSig;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    if ( aExp == 0x7FF ) {
129203Scognet        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
129203Scognet        return packFloat32( aSign, 0xFF, 0 );
129203Scognet    }
129203Scognet    shift64RightJamming( aSig, 22, &aSig );
129203Scognet    zSig = aSig;
129203Scognet    if ( aExp || zSig ) {
129203Scognet        zSig |= 0x40000000;
129203Scognet        aExp -= 0x381;
129203Scognet    }
129203Scognet    return roundAndPackFloat32( aSign, aExp, zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#ifdef FLOATX80
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the double-precision floating-point value
129203Scognet`a' to the extended double-precision floating-point format.  The conversion
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloatx80 float64_to_floatx80( float64 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp;
129203Scognet    bits64 aSig;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    if ( aExp == 0x7FF ) {
129203Scognet        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
129203Scognet        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
129203Scognet    }
129203Scognet    return
129203Scognet        packFloatx80(
129203Scognet            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif
129203Scognet
129203Scognet#ifdef FLOAT128
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the double-precision floating-point value
129203Scognet`a' to the quadruple-precision floating-point format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat128 float64_to_float128( float64 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp;
129203Scognet    bits64 aSig, zSig0, zSig1;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    if ( aExp == 0x7FF ) {
129203Scognet        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
129203Scognet        return packFloat128( aSign, 0x7FFF, 0, 0 );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
129203Scognet        --aExp;
129203Scognet    }
129203Scognet    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
129203Scognet    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif
129203Scognet
129203Scognet#ifndef SOFTFLOAT_FOR_GCC
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetRounds the double-precision floating-point value `a' to an integer, and
129203Scognetreturns the result as a double-precision floating-point value.  The
129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat64 float64_round_to_int( float64 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp;
129203Scognet    bits64 lastBitMask, roundBitsMask;
129203Scognet    int8 roundingMode;
129203Scognet    float64 z;
129203Scognet
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    if ( 0x433 <= aExp ) {
129203Scognet        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
129203Scognet            return propagateFloat64NaN( a, a );
129203Scognet        }
129203Scognet        return a;
129203Scognet    }
129203Scognet    if ( aExp < 0x3FF ) {
129203Scognet        if ( (bits64) ( a<<1 ) == 0 ) return a;
129203Scognet        float_exception_flags |= float_flag_inexact;
129203Scognet        aSign = extractFloat64Sign( a );
129203Scognet        switch ( float_rounding_mode ) {
129203Scognet         case float_round_nearest_even:
129203Scognet            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
129203Scognet                return packFloat64( aSign, 0x3FF, 0 );
129203Scognet            }
129203Scognet            break;
129203Scognet	 case float_round_to_zero:
129203Scognet	    break;
129203Scognet         case float_round_down:
129203Scognet            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
129203Scognet         case float_round_up:
129203Scognet            return
129203Scognet            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
129203Scognet        }
129203Scognet        return packFloat64( aSign, 0, 0 );
129203Scognet    }
129203Scognet    lastBitMask = 1;
129203Scognet    lastBitMask <<= 0x433 - aExp;
129203Scognet    roundBitsMask = lastBitMask - 1;
129203Scognet    z = a;
129203Scognet    roundingMode = float_rounding_mode;
129203Scognet    if ( roundingMode == float_round_nearest_even ) {
129203Scognet        z += lastBitMask>>1;
129203Scognet        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
129203Scognet    }
129203Scognet    else if ( roundingMode != float_round_to_zero ) {
129203Scognet        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
129203Scognet            z += roundBitsMask;
129203Scognet        }
129203Scognet    }
129203Scognet    z &= ~ roundBitsMask;
129203Scognet    if ( z != a ) float_exception_flags |= float_flag_inexact;
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet#endif
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of adding the absolute values of the double-precision
129203Scognetfloating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
129203Scognetbefore being returned.  `zSign' is ignored if the result is a NaN.
129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
129203Scognet{
129203Scognet    int16 aExp, bExp, zExp;
129203Scognet    bits64 aSig, bSig, zSig;
129203Scognet    int16 expDiff;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    bSig = extractFloat64Frac( b );
129203Scognet    bExp = extractFloat64Exp( b );
129203Scognet    expDiff = aExp - bExp;
129203Scognet    aSig <<= 9;
129203Scognet    bSig <<= 9;
129203Scognet    if ( 0 < expDiff ) {
129203Scognet        if ( aExp == 0x7FF ) {
129203Scognet            if ( aSig ) return propagateFloat64NaN( a, b );
129203Scognet            return a;
129203Scognet        }
129203Scognet        if ( bExp == 0 ) {
129203Scognet            --expDiff;
129203Scognet        }
129203Scognet        else {
129203Scognet            bSig |= LIT64( 0x2000000000000000 );
129203Scognet        }
129203Scognet        shift64RightJamming( bSig, expDiff, &bSig );
129203Scognet        zExp = aExp;
129203Scognet    }
129203Scognet    else if ( expDiff < 0 ) {
129203Scognet        if ( bExp == 0x7FF ) {
129203Scognet            if ( bSig ) return propagateFloat64NaN( a, b );
129203Scognet            return packFloat64( zSign, 0x7FF, 0 );
129203Scognet        }
129203Scognet        if ( aExp == 0 ) {
129203Scognet            ++expDiff;
129203Scognet        }
129203Scognet        else {
129203Scognet            aSig |= LIT64( 0x2000000000000000 );
129203Scognet        }
129203Scognet        shift64RightJamming( aSig, - expDiff, &aSig );
129203Scognet        zExp = bExp;
129203Scognet    }
129203Scognet    else {
129203Scognet        if ( aExp == 0x7FF ) {
129203Scognet            if ( aSig | bSig ) return propagateFloat64NaN( a, b );
129203Scognet            return a;
129203Scognet        }
129203Scognet        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
129203Scognet        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
129203Scognet        zExp = aExp;
129203Scognet        goto roundAndPack;
129203Scognet    }
129203Scognet    aSig |= LIT64( 0x2000000000000000 );
129203Scognet    zSig = ( aSig + bSig )<<1;
129203Scognet    --zExp;
129203Scognet    if ( (sbits64) zSig < 0 ) {
129203Scognet        zSig = aSig + bSig;
129203Scognet        ++zExp;
129203Scognet    }
129203Scognet roundAndPack:
129203Scognet    return roundAndPackFloat64( zSign, zExp, zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of subtracting the absolute values of the double-
129203Scognetprecision floating-point values `a' and `b'.  If `zSign' is 1, the
129203Scognetdifference is negated before being returned.  `zSign' is ignored if the
129203Scognetresult is a NaN.  The subtraction is performed according to the IEC/IEEE
129203ScognetStandard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
129203Scognet{
129203Scognet    int16 aExp, bExp, zExp;
129203Scognet    bits64 aSig, bSig, zSig;
129203Scognet    int16 expDiff;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    bSig = extractFloat64Frac( b );
129203Scognet    bExp = extractFloat64Exp( b );
129203Scognet    expDiff = aExp - bExp;
129203Scognet    aSig <<= 10;
129203Scognet    bSig <<= 10;
129203Scognet    if ( 0 < expDiff ) goto aExpBigger;
129203Scognet    if ( expDiff < 0 ) goto bExpBigger;
129203Scognet    if ( aExp == 0x7FF ) {
129203Scognet        if ( aSig | bSig ) return propagateFloat64NaN( a, b );
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return float64_default_nan;
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        aExp = 1;
129203Scognet        bExp = 1;
129203Scognet    }
129203Scognet    if ( bSig < aSig ) goto aBigger;
129203Scognet    if ( aSig < bSig ) goto bBigger;
129203Scognet    return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
129203Scognet bExpBigger:
129203Scognet    if ( bExp == 0x7FF ) {
129203Scognet        if ( bSig ) return propagateFloat64NaN( a, b );
129203Scognet        return packFloat64( zSign ^ 1, 0x7FF, 0 );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        ++expDiff;
129203Scognet    }
129203Scognet    else {
129203Scognet        aSig |= LIT64( 0x4000000000000000 );
129203Scognet    }
129203Scognet    shift64RightJamming( aSig, - expDiff, &aSig );
129203Scognet    bSig |= LIT64( 0x4000000000000000 );
129203Scognet bBigger:
129203Scognet    zSig = bSig - aSig;
129203Scognet    zExp = bExp;
129203Scognet    zSign ^= 1;
129203Scognet    goto normalizeRoundAndPack;
129203Scognet aExpBigger:
129203Scognet    if ( aExp == 0x7FF ) {
129203Scognet        if ( aSig ) return propagateFloat64NaN( a, b );
129203Scognet        return a;
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        --expDiff;
129203Scognet    }
129203Scognet    else {
129203Scognet        bSig |= LIT64( 0x4000000000000000 );
129203Scognet    }
129203Scognet    shift64RightJamming( bSig, expDiff, &bSig );
129203Scognet    aSig |= LIT64( 0x4000000000000000 );
129203Scognet aBigger:
129203Scognet    zSig = aSig - bSig;
129203Scognet    zExp = aExp;
129203Scognet normalizeRoundAndPack:
129203Scognet    --zExp;
129203Scognet    return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of adding the double-precision floating-point values `a'
129203Scognetand `b'.  The operation is performed according to the IEC/IEEE Standard for
129203ScognetBinary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat64 float64_add( float64 a, float64 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    bSign = extractFloat64Sign( b );
129203Scognet    if ( aSign == bSign ) {
129203Scognet        return addFloat64Sigs( a, b, aSign );
129203Scognet    }
129203Scognet    else {
129203Scognet        return subFloat64Sigs( a, b, aSign );
129203Scognet    }
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of subtracting the double-precision floating-point values
129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
129203Scognetfor Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat64 float64_sub( float64 a, float64 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    bSign = extractFloat64Sign( b );
129203Scognet    if ( aSign == bSign ) {
129203Scognet        return subFloat64Sigs( a, b, aSign );
129203Scognet    }
129203Scognet    else {
129203Scognet        return addFloat64Sigs( a, b, aSign );
129203Scognet    }
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of multiplying the double-precision floating-point values
129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
129203Scognetfor Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat64 float64_mul( float64 a, float64 b )
129203Scognet{
129203Scognet    flag aSign, bSign, zSign;
129203Scognet    int16 aExp, bExp, zExp;
129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    bSig = extractFloat64Frac( b );
129203Scognet    bExp = extractFloat64Exp( b );
129203Scognet    bSign = extractFloat64Sign( b );
129203Scognet    zSign = aSign ^ bSign;
129203Scognet    if ( aExp == 0x7FF ) {
129203Scognet        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
129203Scognet            return propagateFloat64NaN( a, b );
129203Scognet        }
129203Scognet        if ( ( bExp | bSig ) == 0 ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            return float64_default_nan;
129203Scognet        }
129203Scognet        return packFloat64( zSign, 0x7FF, 0 );
129203Scognet    }
129203Scognet    if ( bExp == 0x7FF ) {
129203Scognet        if ( bSig ) return propagateFloat64NaN( a, b );
129203Scognet        if ( ( aExp | aSig ) == 0 ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            return float64_default_nan;
129203Scognet        }
129203Scognet        return packFloat64( zSign, 0x7FF, 0 );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
129203Scognet        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
129203Scognet    }
129203Scognet    zExp = aExp + bExp - 0x3FF;
129203Scognet    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
129203Scognet    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
129203Scognet    mul64To128( aSig, bSig, &zSig0, &zSig1 );
129203Scognet    zSig0 |= ( zSig1 != 0 );
129203Scognet    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
129203Scognet        zSig0 <<= 1;
129203Scognet        --zExp;
129203Scognet    }
129203Scognet    return roundAndPackFloat64( zSign, zExp, zSig0 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of dividing the double-precision floating-point value `a'
129203Scognetby the corresponding value `b'.  The operation is performed according to
129203Scognetthe IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat64 float64_div( float64 a, float64 b )
129203Scognet{
129203Scognet    flag aSign, bSign, zSign;
129203Scognet    int16 aExp, bExp, zExp;
129203Scognet    bits64 aSig, bSig, zSig;
129203Scognet    bits64 rem0, rem1;
129203Scognet    bits64 term0, term1;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    bSig = extractFloat64Frac( b );
129203Scognet    bExp = extractFloat64Exp( b );
129203Scognet    bSign = extractFloat64Sign( b );
129203Scognet    zSign = aSign ^ bSign;
129203Scognet    if ( aExp == 0x7FF ) {
129203Scognet        if ( aSig ) return propagateFloat64NaN( a, b );
129203Scognet        if ( bExp == 0x7FF ) {
129203Scognet            if ( bSig ) return propagateFloat64NaN( a, b );
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            return float64_default_nan;
129203Scognet        }
129203Scognet        return packFloat64( zSign, 0x7FF, 0 );
129203Scognet    }
129203Scognet    if ( bExp == 0x7FF ) {
129203Scognet        if ( bSig ) return propagateFloat64NaN( a, b );
129203Scognet        return packFloat64( zSign, 0, 0 );
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        if ( bSig == 0 ) {
129203Scognet            if ( ( aExp | aSig ) == 0 ) {
129203Scognet                float_raise( float_flag_invalid );
129203Scognet                return float64_default_nan;
129203Scognet            }
129203Scognet            float_raise( float_flag_divbyzero );
129203Scognet            return packFloat64( zSign, 0x7FF, 0 );
129203Scognet        }
129203Scognet        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
129203Scognet    }
129203Scognet    zExp = aExp - bExp + 0x3FD;
129203Scognet    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
129203Scognet    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
129203Scognet    if ( bSig <= ( aSig + aSig ) ) {
129203Scognet        aSig >>= 1;
129203Scognet        ++zExp;
129203Scognet    }
129203Scognet    zSig = estimateDiv128To64( aSig, 0, bSig );
129203Scognet    if ( ( zSig & 0x1FF ) <= 2 ) {
129203Scognet        mul64To128( bSig, zSig, &term0, &term1 );
129203Scognet        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
129203Scognet        while ( (sbits64) rem0 < 0 ) {
129203Scognet            --zSig;
129203Scognet            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
129203Scognet        }
129203Scognet        zSig |= ( rem1 != 0 );
129203Scognet    }
129203Scognet    return roundAndPackFloat64( zSign, zExp, zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#ifndef SOFTFLOAT_FOR_GCC
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the remainder of the double-precision floating-point value `a'
129203Scognetwith respect to the corresponding value `b'.  The operation is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat64 float64_rem( float64 a, float64 b )
129203Scognet{
129203Scognet    flag aSign, bSign, zSign;
129203Scognet    int16 aExp, bExp, expDiff;
129203Scognet    bits64 aSig, bSig;
129203Scognet    bits64 q, alternateASig;
129203Scognet    sbits64 sigMean;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    bSig = extractFloat64Frac( b );
129203Scognet    bExp = extractFloat64Exp( b );
129203Scognet    bSign = extractFloat64Sign( b );
129203Scognet    if ( aExp == 0x7FF ) {
129203Scognet        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
129203Scognet            return propagateFloat64NaN( a, b );
129203Scognet        }
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return float64_default_nan;
129203Scognet    }
129203Scognet    if ( bExp == 0x7FF ) {
129203Scognet        if ( bSig ) return propagateFloat64NaN( a, b );
129203Scognet        return a;
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        if ( bSig == 0 ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            return float64_default_nan;
129203Scognet        }
129203Scognet        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return a;
129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
129203Scognet    }
129203Scognet    expDiff = aExp - bExp;
129203Scognet    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
129203Scognet    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
129203Scognet    if ( expDiff < 0 ) {
129203Scognet        if ( expDiff < -1 ) return a;
129203Scognet        aSig >>= 1;
129203Scognet    }
129203Scognet    q = ( bSig <= aSig );
129203Scognet    if ( q ) aSig -= bSig;
129203Scognet    expDiff -= 64;
129203Scognet    while ( 0 < expDiff ) {
129203Scognet        q = estimateDiv128To64( aSig, 0, bSig );
129203Scognet        q = ( 2 < q ) ? q - 2 : 0;
129203Scognet        aSig = - ( ( bSig>>2 ) * q );
129203Scognet        expDiff -= 62;
129203Scognet    }
129203Scognet    expDiff += 64;
129203Scognet    if ( 0 < expDiff ) {
129203Scognet        q = estimateDiv128To64( aSig, 0, bSig );
129203Scognet        q = ( 2 < q ) ? q - 2 : 0;
129203Scognet        q >>= 64 - expDiff;
129203Scognet        bSig >>= 2;
129203Scognet        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
129203Scognet    }
129203Scognet    else {
129203Scognet        aSig >>= 2;
129203Scognet        bSig >>= 2;
129203Scognet    }
129203Scognet    do {
129203Scognet        alternateASig = aSig;
129203Scognet        ++q;
129203Scognet        aSig -= bSig;
129203Scognet    } while ( 0 <= (sbits64) aSig );
129203Scognet    sigMean = aSig + alternateASig;
129203Scognet    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
129203Scognet        aSig = alternateASig;
129203Scognet    }
129203Scognet    zSign = ( (sbits64) aSig < 0 );
129203Scognet    if ( zSign ) aSig = - aSig;
129203Scognet    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the square root of the double-precision floating-point value `a'.
129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat64 float64_sqrt( float64 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp, zExp;
129203Scognet    bits64 aSig, zSig, doubleZSig;
129203Scognet    bits64 rem0, rem1, term0, term1;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    if ( aExp == 0x7FF ) {
129203Scognet        if ( aSig ) return propagateFloat64NaN( a, a );
129203Scognet        if ( ! aSign ) return a;
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return float64_default_nan;
129203Scognet    }
129203Scognet    if ( aSign ) {
129203Scognet        if ( ( aExp | aSig ) == 0 ) return a;
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return float64_default_nan;
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return 0;
129203Scognet        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
129203Scognet    }
129203Scognet    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
129203Scognet    aSig |= LIT64( 0x0010000000000000 );
129203Scognet    zSig = estimateSqrt32( aExp, aSig>>21 );
129203Scognet    aSig <<= 9 - ( aExp & 1 );
129203Scognet    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
129203Scognet    if ( ( zSig & 0x1FF ) <= 5 ) {
129203Scognet        doubleZSig = zSig<<1;
129203Scognet        mul64To128( zSig, zSig, &term0, &term1 );
129203Scognet        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
129203Scognet        while ( (sbits64) rem0 < 0 ) {
129203Scognet            --zSig;
129203Scognet            doubleZSig -= 2;
129203Scognet            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
129203Scognet        }
129203Scognet        zSig |= ( ( rem0 | rem1 ) != 0 );
129203Scognet    }
129203Scognet    return roundAndPackFloat64( 0, zExp, zSig );
129203Scognet
129203Scognet}
129203Scognet#endif
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the double-precision floating-point value `a' is equal to the
129203Scognetcorresponding value `b', and 0 otherwise.  The comparison is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float64_eq( float64 a, float64 b )
129203Scognet{
129203Scognet
129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
129203Scognet       ) {
129203Scognet        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet        }
129203Scognet        return 0;
129203Scognet    }
129203Scognet    return ( a == b ) ||
129203Scognet	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the double-precision floating-point value `a' is less than or
129203Scognetequal to the corresponding value `b', and 0 otherwise.  The comparison is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float64_le( float64 a, float64 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
129203Scognet       ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    bSign = extractFloat64Sign( b );
129203Scognet    if ( aSign != bSign )
129203Scognet	return aSign ||
129203Scognet	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
129203Scognet	      0 );
129203Scognet    return ( a == b ) ||
129203Scognet	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the double-precision floating-point value `a' is less than
129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float64_lt( float64 a, float64 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
129203Scognet       ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    bSign = extractFloat64Sign( b );
129203Scognet    if ( aSign != bSign )
129203Scognet	return aSign &&
129203Scognet	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
129203Scognet	      0 );
129203Scognet    return ( a != b ) &&
129203Scognet	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#ifndef SOFTFLOAT_FOR_GCC
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the double-precision floating-point value `a' is equal to the
129203Scognetcorresponding value `b', and 0 otherwise.  The invalid exception is raised
129203Scognetif either operand is a NaN.  Otherwise, the comparison is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float64_eq_signaling( float64 a, float64 b )
129203Scognet{
129203Scognet
129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
129203Scognet       ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0;
129203Scognet    }
129203Scognet    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the double-precision floating-point value `a' is less than or
129203Scognetequal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
129203Scognetcause an exception.  Otherwise, the comparison is performed according to the
129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float64_le_quiet( float64 a, float64 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
129203Scognet       ) {
129203Scognet        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet        }
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    bSign = extractFloat64Sign( b );
129203Scognet    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
129203Scognet    return ( a == b ) || ( aSign ^ ( a < b ) );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the double-precision floating-point value `a' is less than
129203Scognetthe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
129203Scognetexception.  Otherwise, the comparison is performed according to the IEC/IEEE
129203ScognetStandard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float64_lt_quiet( float64 a, float64 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
129203Scognet         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
129203Scognet       ) {
129203Scognet        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet        }
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet    bSign = extractFloat64Sign( b );
129203Scognet    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
129203Scognet    return ( a != b ) && ( aSign ^ ( a < b ) );
129203Scognet
129203Scognet}
129203Scognet#endif
129203Scognet
129203Scognet#ifdef FLOATX80
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the extended double-precision floating-
129203Scognetpoint value `a' to the 32-bit two's complement integer format.  The
129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic---which means in particular that the conversion
129203Scognetis rounded according to the current rounding mode.  If `a' is a NaN, the
129203Scognetlargest positive integer is returned.  Otherwise, if the conversion
129203Scognetoverflows, the largest integer with the same sign as `a' is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint32 floatx80_to_int32( floatx80 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp, shiftCount;
129203Scognet    bits64 aSig;
129203Scognet
129203Scognet    aSig = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
129203Scognet    shiftCount = 0x4037 - aExp;
129203Scognet    if ( shiftCount <= 0 ) shiftCount = 1;
129203Scognet    shift64RightJamming( aSig, shiftCount, &aSig );
129203Scognet    return roundAndPackInt32( aSign, aSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the extended double-precision floating-
129203Scognetpoint value `a' to the 32-bit two's complement integer format.  The
129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic, except that the conversion is always rounded
129203Scognettoward zero.  If `a' is a NaN, the largest positive integer is returned.
129203ScognetOtherwise, if the conversion overflows, the largest integer with the same
129203Scognetsign as `a' is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint32 floatx80_to_int32_round_to_zero( floatx80 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp, shiftCount;
129203Scognet    bits64 aSig, savedASig;
129203Scognet    int32 z;
129203Scognet
129203Scognet    aSig = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    if ( 0x401E < aExp ) {
129203Scognet        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
129203Scognet        goto invalid;
129203Scognet    }
129203Scognet    else if ( aExp < 0x3FFF ) {
129203Scognet        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
129203Scognet        return 0;
129203Scognet    }
129203Scognet    shiftCount = 0x403E - aExp;
129203Scognet    savedASig = aSig;
129203Scognet    aSig >>= shiftCount;
129203Scognet    z = aSig;
129203Scognet    if ( aSign ) z = - z;
129203Scognet    if ( ( z < 0 ) ^ aSign ) {
129203Scognet invalid:
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
129203Scognet    }
129203Scognet    if ( ( aSig<<shiftCount ) != savedASig ) {
129203Scognet        float_exception_flags |= float_flag_inexact;
129203Scognet    }
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the extended double-precision floating-
129203Scognetpoint value `a' to the 64-bit two's complement integer format.  The
129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic---which means in particular that the conversion
129203Scognetis rounded according to the current rounding mode.  If `a' is a NaN,
129203Scognetthe largest positive integer is returned.  Otherwise, if the conversion
129203Scognetoverflows, the largest integer with the same sign as `a' is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint64 floatx80_to_int64( floatx80 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp, shiftCount;
129203Scognet    bits64 aSig, aSigExtra;
129203Scognet
129203Scognet    aSig = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    shiftCount = 0x403E - aExp;
129203Scognet    if ( shiftCount <= 0 ) {
129203Scognet        if ( shiftCount ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            if (    ! aSign
129203Scognet                 || (    ( aExp == 0x7FFF )
129203Scognet                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
129203Scognet               ) {
129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
129203Scognet            }
129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
129203Scognet        }
129203Scognet        aSigExtra = 0;
129203Scognet    }
129203Scognet    else {
129203Scognet        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
129203Scognet    }
129203Scognet    return roundAndPackInt64( aSign, aSig, aSigExtra );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the extended double-precision floating-
129203Scognetpoint value `a' to the 64-bit two's complement integer format.  The
129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic, except that the conversion is always rounded
129203Scognettoward zero.  If `a' is a NaN, the largest positive integer is returned.
129203ScognetOtherwise, if the conversion overflows, the largest integer with the same
129203Scognetsign as `a' is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint64 floatx80_to_int64_round_to_zero( floatx80 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp, shiftCount;
129203Scognet    bits64 aSig;
129203Scognet    int64 z;
129203Scognet
129203Scognet    aSig = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    shiftCount = aExp - 0x403E;
129203Scognet    if ( 0 <= shiftCount ) {
129203Scognet        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
129203Scognet        if ( ( a.high != 0xC03E ) || aSig ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
129203Scognet            }
129203Scognet        }
129203Scognet        return (sbits64) LIT64( 0x8000000000000000 );
129203Scognet    }
129203Scognet    else if ( aExp < 0x3FFF ) {
129203Scognet        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
129203Scognet        return 0;
129203Scognet    }
129203Scognet    z = aSig>>( - shiftCount );
129203Scognet    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
129203Scognet        float_exception_flags |= float_flag_inexact;
129203Scognet    }
129203Scognet    if ( aSign ) z = - z;
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the extended double-precision floating-
129203Scognetpoint value `a' to the single-precision floating-point format.  The
129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat32 floatx80_to_float32( floatx80 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp;
129203Scognet    bits64 aSig;
129203Scognet
129203Scognet    aSig = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( (bits64) ( aSig<<1 ) ) {
129203Scognet            return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
129203Scognet        }
129203Scognet        return packFloat32( aSign, 0xFF, 0 );
129203Scognet    }
129203Scognet    shift64RightJamming( aSig, 33, &aSig );
129203Scognet    if ( aExp || aSig ) aExp -= 0x3F81;
129203Scognet    return roundAndPackFloat32( aSign, aExp, aSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the extended double-precision floating-
129203Scognetpoint value `a' to the double-precision floating-point format.  The
129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat64 floatx80_to_float64( floatx80 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp;
129203Scognet    bits64 aSig, zSig;
129203Scognet
129203Scognet    aSig = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( (bits64) ( aSig<<1 ) ) {
129203Scognet            return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
129203Scognet        }
129203Scognet        return packFloat64( aSign, 0x7FF, 0 );
129203Scognet    }
129203Scognet    shift64RightJamming( aSig, 1, &zSig );
129203Scognet    if ( aExp || aSig ) aExp -= 0x3C01;
129203Scognet    return roundAndPackFloat64( aSign, aExp, zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#ifdef FLOAT128
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the extended double-precision floating-
129203Scognetpoint value `a' to the quadruple-precision floating-point format.  The
129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat128 floatx80_to_float128( floatx80 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp;
129203Scognet    bits64 aSig, zSig0, zSig1;
129203Scognet
129203Scognet    aSig = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
129203Scognet        return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
129203Scognet    }
129203Scognet    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
129203Scognet    return packFloat128( aSign, aExp, zSig0, zSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetRounds the extended double-precision floating-point value `a' to an integer,
129203Scognetand returns the result as an extended quadruple-precision floating-point
129203Scognetvalue.  The operation is performed according to the IEC/IEEE Standard for
129203ScognetBinary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloatx80 floatx80_round_to_int( floatx80 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp;
129203Scognet    bits64 lastBitMask, roundBitsMask;
129203Scognet    int8 roundingMode;
129203Scognet    floatx80 z;
129203Scognet
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    if ( 0x403E <= aExp ) {
129203Scognet        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
129203Scognet            return propagateFloatx80NaN( a, a );
129203Scognet        }
129203Scognet        return a;
129203Scognet    }
129203Scognet    if ( aExp < 0x3FFF ) {
129203Scognet        if (    ( aExp == 0 )
129203Scognet             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
129203Scognet            return a;
129203Scognet        }
129203Scognet        float_exception_flags |= float_flag_inexact;
129203Scognet        aSign = extractFloatx80Sign( a );
129203Scognet        switch ( float_rounding_mode ) {
129203Scognet         case float_round_nearest_even:
129203Scognet            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
129203Scognet               ) {
129203Scognet                return
129203Scognet                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
129203Scognet            }
129203Scognet            break;
129203Scognet	 case float_round_to_zero:
129203Scognet	    break;
129203Scognet         case float_round_down:
129203Scognet            return
129203Scognet                  aSign ?
129203Scognet                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
129203Scognet                : packFloatx80( 0, 0, 0 );
129203Scognet         case float_round_up:
129203Scognet            return
129203Scognet                  aSign ? packFloatx80( 1, 0, 0 )
129203Scognet                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
129203Scognet        }
129203Scognet        return packFloatx80( aSign, 0, 0 );
129203Scognet    }
129203Scognet    lastBitMask = 1;
129203Scognet    lastBitMask <<= 0x403E - aExp;
129203Scognet    roundBitsMask = lastBitMask - 1;
129203Scognet    z = a;
129203Scognet    roundingMode = float_rounding_mode;
129203Scognet    if ( roundingMode == float_round_nearest_even ) {
129203Scognet        z.low += lastBitMask>>1;
129203Scognet        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
129203Scognet    }
129203Scognet    else if ( roundingMode != float_round_to_zero ) {
129203Scognet        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
129203Scognet            z.low += roundBitsMask;
129203Scognet        }
129203Scognet    }
129203Scognet    z.low &= ~ roundBitsMask;
129203Scognet    if ( z.low == 0 ) {
129203Scognet        ++z.high;
129203Scognet        z.low = LIT64( 0x8000000000000000 );
129203Scognet    }
129203Scognet    if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of adding the absolute values of the extended double-
129203Scognetprecision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
129203Scognetnegated before being returned.  `zSign' is ignored if the result is a NaN.
129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
129203Scognet{
129203Scognet    int32 aExp, bExp, zExp;
129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
129203Scognet    int32 expDiff;
129203Scognet
129203Scognet    aSig = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    bSig = extractFloatx80Frac( b );
129203Scognet    bExp = extractFloatx80Exp( b );
129203Scognet    expDiff = aExp - bExp;
129203Scognet    if ( 0 < expDiff ) {
129203Scognet        if ( aExp == 0x7FFF ) {
129203Scognet            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
129203Scognet            return a;
129203Scognet        }
129203Scognet        if ( bExp == 0 ) --expDiff;
129203Scognet        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
129203Scognet        zExp = aExp;
129203Scognet    }
129203Scognet    else if ( expDiff < 0 ) {
129203Scognet        if ( bExp == 0x7FFF ) {
129203Scognet            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
129203Scognet            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
129203Scognet        }
129203Scognet        if ( aExp == 0 ) ++expDiff;
129203Scognet        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
129203Scognet        zExp = bExp;
129203Scognet    }
129203Scognet    else {
129203Scognet        if ( aExp == 0x7FFF ) {
129203Scognet            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
129203Scognet                return propagateFloatx80NaN( a, b );
129203Scognet            }
129203Scognet            return a;
129203Scognet        }
129203Scognet        zSig1 = 0;
129203Scognet        zSig0 = aSig + bSig;
129203Scognet        if ( aExp == 0 ) {
129203Scognet            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
129203Scognet            goto roundAndPack;
129203Scognet        }
129203Scognet        zExp = aExp;
129203Scognet        goto shiftRight1;
129203Scognet    }
129203Scognet    zSig0 = aSig + bSig;
129203Scognet    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
129203Scognet shiftRight1:
129203Scognet    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
129203Scognet    zSig0 |= LIT64( 0x8000000000000000 );
129203Scognet    ++zExp;
129203Scognet roundAndPack:
129203Scognet    return
129203Scognet        roundAndPackFloatx80(
129203Scognet            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of subtracting the absolute values of the extended
129203Scognetdouble-precision floating-point values `a' and `b'.  If `zSign' is 1, the
129203Scognetdifference is negated before being returned.  `zSign' is ignored if the
129203Scognetresult is a NaN.  The subtraction is performed according to the IEC/IEEE
129203ScognetStandard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
129203Scognet{
129203Scognet    int32 aExp, bExp, zExp;
129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
129203Scognet    int32 expDiff;
129203Scognet    floatx80 z;
129203Scognet
129203Scognet    aSig = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    bSig = extractFloatx80Frac( b );
129203Scognet    bExp = extractFloatx80Exp( b );
129203Scognet    expDiff = aExp - bExp;
129203Scognet    if ( 0 < expDiff ) goto aExpBigger;
129203Scognet    if ( expDiff < 0 ) goto bExpBigger;
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
129203Scognet            return propagateFloatx80NaN( a, b );
129203Scognet        }
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        z.low = floatx80_default_nan_low;
129203Scognet        z.high = floatx80_default_nan_high;
129203Scognet        return z;
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        aExp = 1;
129203Scognet        bExp = 1;
129203Scognet    }
129203Scognet    zSig1 = 0;
129203Scognet    if ( bSig < aSig ) goto aBigger;
129203Scognet    if ( aSig < bSig ) goto bBigger;
129203Scognet    return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
129203Scognet bExpBigger:
129203Scognet    if ( bExp == 0x7FFF ) {
129203Scognet        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
129203Scognet        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) ++expDiff;
129203Scognet    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
129203Scognet bBigger:
129203Scognet    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
129203Scognet    zExp = bExp;
129203Scognet    zSign ^= 1;
129203Scognet    goto normalizeRoundAndPack;
129203Scognet aExpBigger:
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
129203Scognet        return a;
129203Scognet    }
129203Scognet    if ( bExp == 0 ) --expDiff;
129203Scognet    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
129203Scognet aBigger:
129203Scognet    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
129203Scognet    zExp = aExp;
129203Scognet normalizeRoundAndPack:
129203Scognet    return
129203Scognet        normalizeRoundAndPackFloatx80(
129203Scognet            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of adding the extended double-precision floating-point
129203Scognetvalues `a' and `b'.  The operation is performed according to the IEC/IEEE
129203ScognetStandard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloatx80 floatx80_add( floatx80 a, floatx80 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    bSign = extractFloatx80Sign( b );
129203Scognet    if ( aSign == bSign ) {
129203Scognet        return addFloatx80Sigs( a, b, aSign );
129203Scognet    }
129203Scognet    else {
129203Scognet        return subFloatx80Sigs( a, b, aSign );
129203Scognet    }
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of subtracting the extended double-precision floating-
129203Scognetpoint values `a' and `b'.  The operation is performed according to the
129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloatx80 floatx80_sub( floatx80 a, floatx80 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    bSign = extractFloatx80Sign( b );
129203Scognet    if ( aSign == bSign ) {
129203Scognet        return subFloatx80Sigs( a, b, aSign );
129203Scognet    }
129203Scognet    else {
129203Scognet        return addFloatx80Sigs( a, b, aSign );
129203Scognet    }
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of multiplying the extended double-precision floating-
129203Scognetpoint values `a' and `b'.  The operation is performed according to the
129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloatx80 floatx80_mul( floatx80 a, floatx80 b )
129203Scognet{
129203Scognet    flag aSign, bSign, zSign;
129203Scognet    int32 aExp, bExp, zExp;
129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
129203Scognet    floatx80 z;
129203Scognet
129203Scognet    aSig = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    bSig = extractFloatx80Frac( b );
129203Scognet    bExp = extractFloatx80Exp( b );
129203Scognet    bSign = extractFloatx80Sign( b );
129203Scognet    zSign = aSign ^ bSign;
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if (    (bits64) ( aSig<<1 )
129203Scognet             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
129203Scognet            return propagateFloatx80NaN( a, b );
129203Scognet        }
129203Scognet        if ( ( bExp | bSig ) == 0 ) goto invalid;
129203Scognet        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
129203Scognet    }
129203Scognet    if ( bExp == 0x7FFF ) {
129203Scognet        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
129203Scognet        if ( ( aExp | aSig ) == 0 ) {
129203Scognet invalid:
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            z.low = floatx80_default_nan_low;
129203Scognet            z.high = floatx80_default_nan_high;
129203Scognet            return z;
129203Scognet        }
129203Scognet        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
129203Scognet        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
129203Scognet        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
129203Scognet    }
129203Scognet    zExp = aExp + bExp - 0x3FFE;
129203Scognet    mul64To128( aSig, bSig, &zSig0, &zSig1 );
129203Scognet    if ( 0 < (sbits64) zSig0 ) {
129203Scognet        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
129203Scognet        --zExp;
129203Scognet    }
129203Scognet    return
129203Scognet        roundAndPackFloatx80(
129203Scognet            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of dividing the extended double-precision floating-point
129203Scognetvalue `a' by the corresponding value `b'.  The operation is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloatx80 floatx80_div( floatx80 a, floatx80 b )
129203Scognet{
129203Scognet    flag aSign, bSign, zSign;
129203Scognet    int32 aExp, bExp, zExp;
129203Scognet    bits64 aSig, bSig, zSig0, zSig1;
129203Scognet    bits64 rem0, rem1, rem2, term0, term1, term2;
129203Scognet    floatx80 z;
129203Scognet
129203Scognet    aSig = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    bSig = extractFloatx80Frac( b );
129203Scognet    bExp = extractFloatx80Exp( b );
129203Scognet    bSign = extractFloatx80Sign( b );
129203Scognet    zSign = aSign ^ bSign;
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
129203Scognet        if ( bExp == 0x7FFF ) {
129203Scognet            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
129203Scognet            goto invalid;
129203Scognet        }
129203Scognet        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
129203Scognet    }
129203Scognet    if ( bExp == 0x7FFF ) {
129203Scognet        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
129203Scognet        return packFloatx80( zSign, 0, 0 );
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        if ( bSig == 0 ) {
129203Scognet            if ( ( aExp | aSig ) == 0 ) {
129203Scognet invalid:
129203Scognet                float_raise( float_flag_invalid );
129203Scognet                z.low = floatx80_default_nan_low;
129203Scognet                z.high = floatx80_default_nan_high;
129203Scognet                return z;
129203Scognet            }
129203Scognet            float_raise( float_flag_divbyzero );
129203Scognet            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
129203Scognet        }
129203Scognet        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
129203Scognet        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
129203Scognet    }
129203Scognet    zExp = aExp - bExp + 0x3FFE;
129203Scognet    rem1 = 0;
129203Scognet    if ( bSig <= aSig ) {
129203Scognet        shift128Right( aSig, 0, 1, &aSig, &rem1 );
129203Scognet        ++zExp;
129203Scognet    }
129203Scognet    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
129203Scognet    mul64To128( bSig, zSig0, &term0, &term1 );
129203Scognet    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
129203Scognet    while ( (sbits64) rem0 < 0 ) {
129203Scognet        --zSig0;
129203Scognet        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
129203Scognet    }
129203Scognet    zSig1 = estimateDiv128To64( rem1, 0, bSig );
129203Scognet    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
129203Scognet        mul64To128( bSig, zSig1, &term1, &term2 );
129203Scognet        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
129203Scognet        while ( (sbits64) rem1 < 0 ) {
129203Scognet            --zSig1;
129203Scognet            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
129203Scognet        }
129203Scognet        zSig1 |= ( ( rem1 | rem2 ) != 0 );
129203Scognet    }
129203Scognet    return
129203Scognet        roundAndPackFloatx80(
129203Scognet            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the remainder of the extended double-precision floating-point value
129203Scognet`a' with respect to the corresponding value `b'.  The operation is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloatx80 floatx80_rem( floatx80 a, floatx80 b )
129203Scognet{
129203Scognet    flag aSign, bSign, zSign;
129203Scognet    int32 aExp, bExp, expDiff;
129203Scognet    bits64 aSig0, aSig1, bSig;
129203Scognet    bits64 q, term0, term1, alternateASig0, alternateASig1;
129203Scognet    floatx80 z;
129203Scognet
129203Scognet    aSig0 = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    bSig = extractFloatx80Frac( b );
129203Scognet    bExp = extractFloatx80Exp( b );
129203Scognet    bSign = extractFloatx80Sign( b );
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if (    (bits64) ( aSig0<<1 )
129203Scognet             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
129203Scognet            return propagateFloatx80NaN( a, b );
129203Scognet        }
129203Scognet        goto invalid;
129203Scognet    }
129203Scognet    if ( bExp == 0x7FFF ) {
129203Scognet        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
129203Scognet        return a;
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        if ( bSig == 0 ) {
129203Scognet invalid:
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            z.low = floatx80_default_nan_low;
129203Scognet            z.high = floatx80_default_nan_high;
129203Scognet            return z;
129203Scognet        }
129203Scognet        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
129203Scognet        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
129203Scognet    }
129203Scognet    bSig |= LIT64( 0x8000000000000000 );
129203Scognet    zSign = aSign;
129203Scognet    expDiff = aExp - bExp;
129203Scognet    aSig1 = 0;
129203Scognet    if ( expDiff < 0 ) {
129203Scognet        if ( expDiff < -1 ) return a;
129203Scognet        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
129203Scognet        expDiff = 0;
129203Scognet    }
129203Scognet    q = ( bSig <= aSig0 );
129203Scognet    if ( q ) aSig0 -= bSig;
129203Scognet    expDiff -= 64;
129203Scognet    while ( 0 < expDiff ) {
129203Scognet        q = estimateDiv128To64( aSig0, aSig1, bSig );
129203Scognet        q = ( 2 < q ) ? q - 2 : 0;
129203Scognet        mul64To128( bSig, q, &term0, &term1 );
129203Scognet        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
129203Scognet        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
129203Scognet        expDiff -= 62;
129203Scognet    }
129203Scognet    expDiff += 64;
129203Scognet    if ( 0 < expDiff ) {
129203Scognet        q = estimateDiv128To64( aSig0, aSig1, bSig );
129203Scognet        q = ( 2 < q ) ? q - 2 : 0;
129203Scognet        q >>= 64 - expDiff;
129203Scognet        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
129203Scognet        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
129203Scognet        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
129203Scognet        while ( le128( term0, term1, aSig0, aSig1 ) ) {
129203Scognet            ++q;
129203Scognet            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
129203Scognet        }
129203Scognet    }
129203Scognet    else {
129203Scognet        term1 = 0;
129203Scognet        term0 = bSig;
129203Scognet    }
129203Scognet    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
129203Scognet    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
129203Scognet         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
129203Scognet              && ( q & 1 ) )
129203Scognet       ) {
129203Scognet        aSig0 = alternateASig0;
129203Scognet        aSig1 = alternateASig1;
129203Scognet        zSign = ! zSign;
129203Scognet    }
129203Scognet    return
129203Scognet        normalizeRoundAndPackFloatx80(
129203Scognet            80, zSign, bExp + expDiff, aSig0, aSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the square root of the extended double-precision floating-point
129203Scognetvalue `a'.  The operation is performed according to the IEC/IEEE Standard
129203Scognetfor Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloatx80 floatx80_sqrt( floatx80 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp, zExp;
129203Scognet    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
129203Scognet    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
129203Scognet    floatx80 z;
129203Scognet
129203Scognet    aSig0 = extractFloatx80Frac( a );
129203Scognet    aExp = extractFloatx80Exp( a );
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
129203Scognet        if ( ! aSign ) return a;
129203Scognet        goto invalid;
129203Scognet    }
129203Scognet    if ( aSign ) {
129203Scognet        if ( ( aExp | aSig0 ) == 0 ) return a;
129203Scognet invalid:
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        z.low = floatx80_default_nan_low;
129203Scognet        z.high = floatx80_default_nan_high;
129203Scognet        return z;
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
129203Scognet        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
129203Scognet    }
129203Scognet    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
129203Scognet    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
129203Scognet    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
129203Scognet    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
129203Scognet    doubleZSig0 = zSig0<<1;
129203Scognet    mul64To128( zSig0, zSig0, &term0, &term1 );
129203Scognet    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
129203Scognet    while ( (sbits64) rem0 < 0 ) {
129203Scognet        --zSig0;
129203Scognet        doubleZSig0 -= 2;
129203Scognet        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
129203Scognet    }
129203Scognet    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
129203Scognet    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
129203Scognet        if ( zSig1 == 0 ) zSig1 = 1;
129203Scognet        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
129203Scognet        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
129203Scognet        mul64To128( zSig1, zSig1, &term2, &term3 );
129203Scognet        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
129203Scognet        while ( (sbits64) rem1 < 0 ) {
129203Scognet            --zSig1;
129203Scognet            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
129203Scognet            term3 |= 1;
129203Scognet            term2 |= doubleZSig0;
129203Scognet            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
129203Scognet        }
129203Scognet        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
129203Scognet    }
129203Scognet    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
129203Scognet    zSig0 |= doubleZSig0;
129203Scognet    return
129203Scognet        roundAndPackFloatx80(
129203Scognet            floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the extended double-precision floating-point value `a' is
129203Scognetequal to the corresponding value `b', and 0 otherwise.  The comparison is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag floatx80_eq( floatx80 a, floatx80 b )
129203Scognet{
129203Scognet
129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
129203Scognet       ) {
129203Scognet        if (    floatx80_is_signaling_nan( a )
129203Scognet             || floatx80_is_signaling_nan( b ) ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet        }
129203Scognet        return 0;
129203Scognet    }
129203Scognet    return
129203Scognet           ( a.low == b.low )
129203Scognet        && (    ( a.high == b.high )
129203Scognet             || (    ( a.low == 0 )
129203Scognet                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
129203Scognet           );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the extended double-precision floating-point value `a' is
129203Scognetless than or equal to the corresponding value `b', and 0 otherwise.  The
129203Scognetcomparison is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag floatx80_le( floatx80 a, floatx80 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
129203Scognet       ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    bSign = extractFloatx80Sign( b );
129203Scognet    if ( aSign != bSign ) {
129203Scognet        return
129203Scognet               aSign
129203Scognet            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
129203Scognet                 == 0 );
129203Scognet    }
129203Scognet    return
129203Scognet          aSign ? le128( b.high, b.low, a.high, a.low )
129203Scognet        : le128( a.high, a.low, b.high, b.low );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the extended double-precision floating-point value `a' is
129203Scognetless than the corresponding value `b', and 0 otherwise.  The comparison
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag floatx80_lt( floatx80 a, floatx80 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
129203Scognet       ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    bSign = extractFloatx80Sign( b );
129203Scognet    if ( aSign != bSign ) {
129203Scognet        return
129203Scognet               aSign
129203Scognet            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
129203Scognet                 != 0 );
129203Scognet    }
129203Scognet    return
129203Scognet          aSign ? lt128( b.high, b.low, a.high, a.low )
129203Scognet        : lt128( a.high, a.low, b.high, b.low );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the extended double-precision floating-point value `a' is equal
129203Scognetto the corresponding value `b', and 0 otherwise.  The invalid exception is
129203Scognetraised if either operand is a NaN.  Otherwise, the comparison is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag floatx80_eq_signaling( floatx80 a, floatx80 b )
129203Scognet{
129203Scognet
129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
129203Scognet       ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0;
129203Scognet    }
129203Scognet    return
129203Scognet           ( a.low == b.low )
129203Scognet        && (    ( a.high == b.high )
129203Scognet             || (    ( a.low == 0 )
129203Scognet                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
129203Scognet           );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the extended double-precision floating-point value `a' is less
129203Scognetthan or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
129203Scognetdo not cause an exception.  Otherwise, the comparison is performed according
129203Scognetto the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag floatx80_le_quiet( floatx80 a, floatx80 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
129203Scognet       ) {
129203Scognet        if (    floatx80_is_signaling_nan( a )
129203Scognet             || floatx80_is_signaling_nan( b ) ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet        }
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    bSign = extractFloatx80Sign( b );
129203Scognet    if ( aSign != bSign ) {
129203Scognet        return
129203Scognet               aSign
129203Scognet            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
129203Scognet                 == 0 );
129203Scognet    }
129203Scognet    return
129203Scognet          aSign ? le128( b.high, b.low, a.high, a.low )
129203Scognet        : le128( a.high, a.low, b.high, b.low );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the extended double-precision floating-point value `a' is less
129203Scognetthan the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
129203Scognetan exception.  Otherwise, the comparison is performed according to the
129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag floatx80_lt_quiet( floatx80 a, floatx80 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
129203Scognet              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
129203Scognet         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
129203Scognet              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
129203Scognet       ) {
129203Scognet        if (    floatx80_is_signaling_nan( a )
129203Scognet             || floatx80_is_signaling_nan( b ) ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet        }
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloatx80Sign( a );
129203Scognet    bSign = extractFloatx80Sign( b );
129203Scognet    if ( aSign != bSign ) {
129203Scognet        return
129203Scognet               aSign
129203Scognet            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
129203Scognet                 != 0 );
129203Scognet    }
129203Scognet    return
129203Scognet          aSign ? lt128( b.high, b.low, a.high, a.low )
129203Scognet        : lt128( a.high, a.low, b.high, b.low );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif
129203Scognet
129203Scognet#ifdef FLOAT128
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the quadruple-precision floating-point
129203Scognetvalue `a' to the 32-bit two's complement integer format.  The conversion
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic---which means in particular that the conversion is rounded
129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
129203Scognetlargest integer with the same sign as `a' is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint32 float128_to_int32( float128 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp, shiftCount;
129203Scognet    bits64 aSig0, aSig1;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
129203Scognet    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
129203Scognet    aSig0 |= ( aSig1 != 0 );
129203Scognet    shiftCount = 0x4028 - aExp;
129203Scognet    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
129203Scognet    return roundAndPackInt32( aSign, aSig0 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the quadruple-precision floating-point
129203Scognetvalue `a' to the 32-bit two's complement integer format.  The conversion
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic, except that the conversion is always rounded toward zero.  If
129203Scognet`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
129203Scognetconversion overflows, the largest integer with the same sign as `a' is
129203Scognetreturned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint32 float128_to_int32_round_to_zero( float128 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp, shiftCount;
129203Scognet    bits64 aSig0, aSig1, savedASig;
129203Scognet    int32 z;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    aSig0 |= ( aSig1 != 0 );
129203Scognet    if ( 0x401E < aExp ) {
129203Scognet        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
129203Scognet        goto invalid;
129203Scognet    }
129203Scognet    else if ( aExp < 0x3FFF ) {
129203Scognet        if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSig0 |= LIT64( 0x0001000000000000 );
129203Scognet    shiftCount = 0x402F - aExp;
129203Scognet    savedASig = aSig0;
129203Scognet    aSig0 >>= shiftCount;
129203Scognet    z = aSig0;
129203Scognet    if ( aSign ) z = - z;
129203Scognet    if ( ( z < 0 ) ^ aSign ) {
129203Scognet invalid:
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
129203Scognet    }
129203Scognet    if ( ( aSig0<<shiftCount ) != savedASig ) {
129203Scognet        float_exception_flags |= float_flag_inexact;
129203Scognet    }
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the quadruple-precision floating-point
129203Scognetvalue `a' to the 64-bit two's complement integer format.  The conversion
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic---which means in particular that the conversion is rounded
129203Scognetaccording to the current rounding mode.  If `a' is a NaN, the largest
129203Scognetpositive integer is returned.  Otherwise, if the conversion overflows, the
129203Scognetlargest integer with the same sign as `a' is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint64 float128_to_int64( float128 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp, shiftCount;
129203Scognet    bits64 aSig0, aSig1;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
129203Scognet    shiftCount = 0x402F - aExp;
129203Scognet    if ( shiftCount <= 0 ) {
129203Scognet        if ( 0x403E < aExp ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            if (    ! aSign
129203Scognet                 || (    ( aExp == 0x7FFF )
129203Scognet                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
129203Scognet                    )
129203Scognet               ) {
129203Scognet                return LIT64( 0x7FFFFFFFFFFFFFFF );
129203Scognet            }
129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
129203Scognet        }
129203Scognet        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
129203Scognet    }
129203Scognet    else {
129203Scognet        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
129203Scognet    }
129203Scognet    return roundAndPackInt64( aSign, aSig0, aSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the quadruple-precision floating-point
129203Scognetvalue `a' to the 64-bit two's complement integer format.  The conversion
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic, except that the conversion is always rounded toward zero.
129203ScognetIf `a' is a NaN, the largest positive integer is returned.  Otherwise, if
129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is
129203Scognetreturned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetint64 float128_to_int64_round_to_zero( float128 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp, shiftCount;
129203Scognet    bits64 aSig0, aSig1;
129203Scognet    int64 z;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
129203Scognet    shiftCount = aExp - 0x402F;
129203Scognet    if ( 0 < shiftCount ) {
129203Scognet        if ( 0x403E <= aExp ) {
129203Scognet            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
129203Scognet            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
129203Scognet                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
129203Scognet                if ( aSig1 ) float_exception_flags |= float_flag_inexact;
129203Scognet            }
129203Scognet            else {
129203Scognet                float_raise( float_flag_invalid );
129203Scognet                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
129203Scognet                    return LIT64( 0x7FFFFFFFFFFFFFFF );
129203Scognet                }
129203Scognet            }
129203Scognet            return (sbits64) LIT64( 0x8000000000000000 );
129203Scognet        }
129203Scognet        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
129203Scognet        if ( (bits64) ( aSig1<<shiftCount ) ) {
129203Scognet            float_exception_flags |= float_flag_inexact;
129203Scognet        }
129203Scognet    }
129203Scognet    else {
129203Scognet        if ( aExp < 0x3FFF ) {
129203Scognet            if ( aExp | aSig0 | aSig1 ) {
129203Scognet                float_exception_flags |= float_flag_inexact;
129203Scognet            }
129203Scognet            return 0;
129203Scognet        }
129203Scognet        z = aSig0>>( - shiftCount );
129203Scognet        if (    aSig1
129203Scognet             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
129203Scognet            float_exception_flags |= float_flag_inexact;
129203Scognet        }
129203Scognet    }
129203Scognet    if ( aSign ) z = - z;
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
230363Sdas#if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \
230363Sdas    && defined(SOFTFLOAT_NEED_FIXUNS)
129203Scognet/*
230363Sdas * just like above - but do not care for overflow of signed results
230363Sdas */
230363Sdasuint64 float128_to_uint64_round_to_zero( float128 a )
230363Sdas{
230363Sdas    flag aSign;
230363Sdas    int32 aExp, shiftCount;
230363Sdas    bits64 aSig0, aSig1;
230363Sdas    uint64 z;
230363Sdas
230363Sdas    aSig1 = extractFloat128Frac1( a );
230363Sdas    aSig0 = extractFloat128Frac0( a );
230363Sdas    aExp = extractFloat128Exp( a );
230363Sdas    aSign = extractFloat128Sign( a );
230363Sdas    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
230363Sdas    shiftCount = aExp - 0x402F;
230363Sdas    if ( 0 < shiftCount ) {
230363Sdas        if ( 0x403F <= aExp ) {
230363Sdas            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
230363Sdas            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
230363Sdas                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
230363Sdas                if ( aSig1 ) float_exception_flags |= float_flag_inexact;
230363Sdas            }
230363Sdas            else {
230363Sdas                float_raise( float_flag_invalid );
230363Sdas            }
230363Sdas            return LIT64( 0xFFFFFFFFFFFFFFFF );
230363Sdas        }
230363Sdas        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
230363Sdas        if ( (bits64) ( aSig1<<shiftCount ) ) {
230363Sdas            float_exception_flags |= float_flag_inexact;
230363Sdas        }
230363Sdas    }
230363Sdas    else {
230363Sdas        if ( aExp < 0x3FFF ) {
230363Sdas            if ( aExp | aSig0 | aSig1 ) {
230363Sdas                float_exception_flags |= float_flag_inexact;
230363Sdas            }
230363Sdas            return 0;
230363Sdas        }
230363Sdas        z = aSig0>>( - shiftCount );
230363Sdas        if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
230363Sdas            float_exception_flags |= float_flag_inexact;
230363Sdas        }
230363Sdas    }
230363Sdas    if ( aSign ) z = - z;
230363Sdas    return z;
230363Sdas
230363Sdas}
230363Sdas#endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */
230363Sdas
230363Sdas/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the quadruple-precision floating-point
129203Scognetvalue `a' to the single-precision floating-point format.  The conversion
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat32 float128_to_float32( float128 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp;
129203Scognet    bits64 aSig0, aSig1;
129203Scognet    bits32 zSig;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( aSig0 | aSig1 ) {
129203Scognet            return commonNaNToFloat32( float128ToCommonNaN( a ) );
129203Scognet        }
129203Scognet        return packFloat32( aSign, 0xFF, 0 );
129203Scognet    }
129203Scognet    aSig0 |= ( aSig1 != 0 );
129203Scognet    shift64RightJamming( aSig0, 18, &aSig0 );
129203Scognet    zSig = aSig0;
129203Scognet    if ( aExp || zSig ) {
129203Scognet        zSig |= 0x40000000;
129203Scognet        aExp -= 0x3F81;
129203Scognet    }
129203Scognet    return roundAndPackFloat32( aSign, aExp, zSig );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the quadruple-precision floating-point
129203Scognetvalue `a' to the double-precision floating-point format.  The conversion
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat64 float128_to_float64( float128 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp;
129203Scognet    bits64 aSig0, aSig1;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( aSig0 | aSig1 ) {
129203Scognet            return commonNaNToFloat64( float128ToCommonNaN( a ) );
129203Scognet        }
129203Scognet        return packFloat64( aSign, 0x7FF, 0 );
129203Scognet    }
129203Scognet    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
129203Scognet    aSig0 |= ( aSig1 != 0 );
129203Scognet    if ( aExp || aSig0 ) {
129203Scognet        aSig0 |= LIT64( 0x4000000000000000 );
129203Scognet        aExp -= 0x3C01;
129203Scognet    }
129203Scognet    return roundAndPackFloat64( aSign, aExp, aSig0 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#ifdef FLOATX80
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the quadruple-precision floating-point
129203Scognetvalue `a' to the extended double-precision floating-point format.  The
129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloatx80 float128_to_floatx80( float128 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp;
129203Scognet    bits64 aSig0, aSig1;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( aSig0 | aSig1 ) {
129203Scognet            return commonNaNToFloatx80( float128ToCommonNaN( a ) );
129203Scognet        }
129203Scognet        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
129203Scognet    }
129203Scognet    else {
129203Scognet        aSig0 |= LIT64( 0x0001000000000000 );
129203Scognet    }
129203Scognet    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
129203Scognet    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetRounds the quadruple-precision floating-point value `a' to an integer, and
129203Scognetreturns the result as a quadruple-precision floating-point value.  The
129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat128 float128_round_to_int( float128 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp;
129203Scognet    bits64 lastBitMask, roundBitsMask;
129203Scognet    int8 roundingMode;
129203Scognet    float128 z;
129203Scognet
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    if ( 0x402F <= aExp ) {
129203Scognet        if ( 0x406F <= aExp ) {
129203Scognet            if (    ( aExp == 0x7FFF )
129203Scognet                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
129203Scognet               ) {
129203Scognet                return propagateFloat128NaN( a, a );
129203Scognet            }
129203Scognet            return a;
129203Scognet        }
129203Scognet        lastBitMask = 1;
129203Scognet        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
129203Scognet        roundBitsMask = lastBitMask - 1;
129203Scognet        z = a;
129203Scognet        roundingMode = float_rounding_mode;
129203Scognet        if ( roundingMode == float_round_nearest_even ) {
129203Scognet            if ( lastBitMask ) {
129203Scognet                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
129203Scognet                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
129203Scognet            }
129203Scognet            else {
129203Scognet                if ( (sbits64) z.low < 0 ) {
129203Scognet                    ++z.high;
129203Scognet                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
129203Scognet                }
129203Scognet            }
129203Scognet        }
129203Scognet        else if ( roundingMode != float_round_to_zero ) {
129203Scognet            if (   extractFloat128Sign( z )
129203Scognet                 ^ ( roundingMode == float_round_up ) ) {
129203Scognet                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
129203Scognet            }
129203Scognet        }
129203Scognet        z.low &= ~ roundBitsMask;
129203Scognet    }
129203Scognet    else {
129203Scognet        if ( aExp < 0x3FFF ) {
129203Scognet            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
129203Scognet            float_exception_flags |= float_flag_inexact;
129203Scognet            aSign = extractFloat128Sign( a );
129203Scognet            switch ( float_rounding_mode ) {
129203Scognet             case float_round_nearest_even:
129203Scognet                if (    ( aExp == 0x3FFE )
129203Scognet                     && (   extractFloat128Frac0( a )
129203Scognet                          | extractFloat128Frac1( a ) )
129203Scognet                   ) {
129203Scognet                    return packFloat128( aSign, 0x3FFF, 0, 0 );
129203Scognet                }
129203Scognet                break;
129203Scognet	     case float_round_to_zero:
129203Scognet		break;
129203Scognet             case float_round_down:
129203Scognet                return
129203Scognet                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
129203Scognet                    : packFloat128( 0, 0, 0, 0 );
129203Scognet             case float_round_up:
129203Scognet                return
129203Scognet                      aSign ? packFloat128( 1, 0, 0, 0 )
129203Scognet                    : packFloat128( 0, 0x3FFF, 0, 0 );
129203Scognet            }
129203Scognet            return packFloat128( aSign, 0, 0, 0 );
129203Scognet        }
129203Scognet        lastBitMask = 1;
129203Scognet        lastBitMask <<= 0x402F - aExp;
129203Scognet        roundBitsMask = lastBitMask - 1;
129203Scognet        z.low = 0;
129203Scognet        z.high = a.high;
129203Scognet        roundingMode = float_rounding_mode;
129203Scognet        if ( roundingMode == float_round_nearest_even ) {
129203Scognet            z.high += lastBitMask>>1;
129203Scognet            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
129203Scognet                z.high &= ~ lastBitMask;
129203Scognet            }
129203Scognet        }
129203Scognet        else if ( roundingMode != float_round_to_zero ) {
129203Scognet            if (   extractFloat128Sign( z )
129203Scognet                 ^ ( roundingMode == float_round_up ) ) {
129203Scognet                z.high |= ( a.low != 0 );
129203Scognet                z.high += roundBitsMask;
129203Scognet            }
129203Scognet        }
129203Scognet        z.high &= ~ roundBitsMask;
129203Scognet    }
129203Scognet    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
129203Scognet        float_exception_flags |= float_flag_inexact;
129203Scognet    }
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of adding the absolute values of the quadruple-precision
129203Scognetfloating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
129203Scognetbefore being returned.  `zSign' is ignored if the result is a NaN.
129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
129203Scognet{
129203Scognet    int32 aExp, bExp, zExp;
129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
129203Scognet    int32 expDiff;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    bSig1 = extractFloat128Frac1( b );
129203Scognet    bSig0 = extractFloat128Frac0( b );
129203Scognet    bExp = extractFloat128Exp( b );
129203Scognet    expDiff = aExp - bExp;
129203Scognet    if ( 0 < expDiff ) {
129203Scognet        if ( aExp == 0x7FFF ) {
129203Scognet            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
129203Scognet            return a;
129203Scognet        }
129203Scognet        if ( bExp == 0 ) {
129203Scognet            --expDiff;
129203Scognet        }
129203Scognet        else {
129203Scognet            bSig0 |= LIT64( 0x0001000000000000 );
129203Scognet        }
129203Scognet        shift128ExtraRightJamming(
129203Scognet            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
129203Scognet        zExp = aExp;
129203Scognet    }
129203Scognet    else if ( expDiff < 0 ) {
129203Scognet        if ( bExp == 0x7FFF ) {
129203Scognet            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
129203Scognet            return packFloat128( zSign, 0x7FFF, 0, 0 );
129203Scognet        }
129203Scognet        if ( aExp == 0 ) {
129203Scognet            ++expDiff;
129203Scognet        }
129203Scognet        else {
129203Scognet            aSig0 |= LIT64( 0x0001000000000000 );
129203Scognet        }
129203Scognet        shift128ExtraRightJamming(
129203Scognet            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
129203Scognet        zExp = bExp;
129203Scognet    }
129203Scognet    else {
129203Scognet        if ( aExp == 0x7FFF ) {
129203Scognet            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
129203Scognet                return propagateFloat128NaN( a, b );
129203Scognet            }
129203Scognet            return a;
129203Scognet        }
129203Scognet        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
129203Scognet        if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
129203Scognet        zSig2 = 0;
129203Scognet        zSig0 |= LIT64( 0x0002000000000000 );
129203Scognet        zExp = aExp;
129203Scognet        goto shiftRight1;
129203Scognet    }
129203Scognet    aSig0 |= LIT64( 0x0001000000000000 );
129203Scognet    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
129203Scognet    --zExp;
129203Scognet    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
129203Scognet    ++zExp;
129203Scognet shiftRight1:
129203Scognet    shift128ExtraRightJamming(
129203Scognet        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
129203Scognet roundAndPack:
129203Scognet    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of subtracting the absolute values of the quadruple-
129203Scognetprecision floating-point values `a' and `b'.  If `zSign' is 1, the
129203Scognetdifference is negated before being returned.  `zSign' is ignored if the
129203Scognetresult is a NaN.  The subtraction is performed according to the IEC/IEEE
129203ScognetStandard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetstatic float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
129203Scognet{
129203Scognet    int32 aExp, bExp, zExp;
129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
129203Scognet    int32 expDiff;
129203Scognet    float128 z;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    bSig1 = extractFloat128Frac1( b );
129203Scognet    bSig0 = extractFloat128Frac0( b );
129203Scognet    bExp = extractFloat128Exp( b );
129203Scognet    expDiff = aExp - bExp;
129203Scognet    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
129203Scognet    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
129203Scognet    if ( 0 < expDiff ) goto aExpBigger;
129203Scognet    if ( expDiff < 0 ) goto bExpBigger;
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
129203Scognet            return propagateFloat128NaN( a, b );
129203Scognet        }
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        z.low = float128_default_nan_low;
129203Scognet        z.high = float128_default_nan_high;
129203Scognet        return z;
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        aExp = 1;
129203Scognet        bExp = 1;
129203Scognet    }
129203Scognet    if ( bSig0 < aSig0 ) goto aBigger;
129203Scognet    if ( aSig0 < bSig0 ) goto bBigger;
129203Scognet    if ( bSig1 < aSig1 ) goto aBigger;
129203Scognet    if ( aSig1 < bSig1 ) goto bBigger;
129203Scognet    return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
129203Scognet bExpBigger:
129203Scognet    if ( bExp == 0x7FFF ) {
129203Scognet        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
129203Scognet        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        ++expDiff;
129203Scognet    }
129203Scognet    else {
129203Scognet        aSig0 |= LIT64( 0x4000000000000000 );
129203Scognet    }
129203Scognet    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
129203Scognet    bSig0 |= LIT64( 0x4000000000000000 );
129203Scognet bBigger:
129203Scognet    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
129203Scognet    zExp = bExp;
129203Scognet    zSign ^= 1;
129203Scognet    goto normalizeRoundAndPack;
129203Scognet aExpBigger:
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
129203Scognet        return a;
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        --expDiff;
129203Scognet    }
129203Scognet    else {
129203Scognet        bSig0 |= LIT64( 0x4000000000000000 );
129203Scognet    }
129203Scognet    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
129203Scognet    aSig0 |= LIT64( 0x4000000000000000 );
129203Scognet aBigger:
129203Scognet    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
129203Scognet    zExp = aExp;
129203Scognet normalizeRoundAndPack:
129203Scognet    --zExp;
129203Scognet    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of adding the quadruple-precision floating-point values
129203Scognet`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
129203Scognetfor Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat128 float128_add( float128 a, float128 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    bSign = extractFloat128Sign( b );
129203Scognet    if ( aSign == bSign ) {
129203Scognet        return addFloat128Sigs( a, b, aSign );
129203Scognet    }
129203Scognet    else {
129203Scognet        return subFloat128Sigs( a, b, aSign );
129203Scognet    }
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of subtracting the quadruple-precision floating-point
129203Scognetvalues `a' and `b'.  The operation is performed according to the IEC/IEEE
129203ScognetStandard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat128 float128_sub( float128 a, float128 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    bSign = extractFloat128Sign( b );
129203Scognet    if ( aSign == bSign ) {
129203Scognet        return subFloat128Sigs( a, b, aSign );
129203Scognet    }
129203Scognet    else {
129203Scognet        return addFloat128Sigs( a, b, aSign );
129203Scognet    }
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of multiplying the quadruple-precision floating-point
129203Scognetvalues `a' and `b'.  The operation is performed according to the IEC/IEEE
129203ScognetStandard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat128 float128_mul( float128 a, float128 b )
129203Scognet{
129203Scognet    flag aSign, bSign, zSign;
129203Scognet    int32 aExp, bExp, zExp;
129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
129203Scognet    float128 z;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    bSig1 = extractFloat128Frac1( b );
129203Scognet    bSig0 = extractFloat128Frac0( b );
129203Scognet    bExp = extractFloat128Exp( b );
129203Scognet    bSign = extractFloat128Sign( b );
129203Scognet    zSign = aSign ^ bSign;
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if (    ( aSig0 | aSig1 )
129203Scognet             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
129203Scognet            return propagateFloat128NaN( a, b );
129203Scognet        }
129203Scognet        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
129203Scognet        return packFloat128( zSign, 0x7FFF, 0, 0 );
129203Scognet    }
129203Scognet    if ( bExp == 0x7FFF ) {
129203Scognet        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
129203Scognet        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
129203Scognet invalid:
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            z.low = float128_default_nan_low;
129203Scognet            z.high = float128_default_nan_high;
129203Scognet            return z;
129203Scognet        }
129203Scognet        return packFloat128( zSign, 0x7FFF, 0, 0 );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
129203Scognet        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
129203Scognet    }
129203Scognet    zExp = aExp + bExp - 0x4000;
129203Scognet    aSig0 |= LIT64( 0x0001000000000000 );
129203Scognet    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
129203Scognet    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
129203Scognet    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
129203Scognet    zSig2 |= ( zSig3 != 0 );
129203Scognet    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
129203Scognet        shift128ExtraRightJamming(
129203Scognet            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
129203Scognet        ++zExp;
129203Scognet    }
129203Scognet    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of dividing the quadruple-precision floating-point value
129203Scognet`a' by the corresponding value `b'.  The operation is performed according to
129203Scognetthe IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat128 float128_div( float128 a, float128 b )
129203Scognet{
129203Scognet    flag aSign, bSign, zSign;
129203Scognet    int32 aExp, bExp, zExp;
129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
129203Scognet    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
129203Scognet    float128 z;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    bSig1 = extractFloat128Frac1( b );
129203Scognet    bSig0 = extractFloat128Frac0( b );
129203Scognet    bExp = extractFloat128Exp( b );
129203Scognet    bSign = extractFloat128Sign( b );
129203Scognet    zSign = aSign ^ bSign;
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
129203Scognet        if ( bExp == 0x7FFF ) {
129203Scognet            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
129203Scognet            goto invalid;
129203Scognet        }
129203Scognet        return packFloat128( zSign, 0x7FFF, 0, 0 );
129203Scognet    }
129203Scognet    if ( bExp == 0x7FFF ) {
129203Scognet        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
129203Scognet        return packFloat128( zSign, 0, 0, 0 );
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        if ( ( bSig0 | bSig1 ) == 0 ) {
129203Scognet            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
129203Scognet invalid:
129203Scognet                float_raise( float_flag_invalid );
129203Scognet                z.low = float128_default_nan_low;
129203Scognet                z.high = float128_default_nan_high;
129203Scognet                return z;
129203Scognet            }
129203Scognet            float_raise( float_flag_divbyzero );
129203Scognet            return packFloat128( zSign, 0x7FFF, 0, 0 );
129203Scognet        }
129203Scognet        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
129203Scognet    }
129203Scognet    zExp = aExp - bExp + 0x3FFD;
129203Scognet    shortShift128Left(
129203Scognet        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
129203Scognet    shortShift128Left(
129203Scognet        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
129203Scognet    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
129203Scognet        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
129203Scognet        ++zExp;
129203Scognet    }
129203Scognet    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
129203Scognet    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
129203Scognet    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
129203Scognet    while ( (sbits64) rem0 < 0 ) {
129203Scognet        --zSig0;
129203Scognet        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
129203Scognet    }
129203Scognet    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
129203Scognet    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
129203Scognet        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
129203Scognet        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
129203Scognet        while ( (sbits64) rem1 < 0 ) {
129203Scognet            --zSig1;
129203Scognet            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
129203Scognet        }
129203Scognet        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
129203Scognet    }
129203Scognet    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
129203Scognet    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the remainder of the quadruple-precision floating-point value `a'
129203Scognetwith respect to the corresponding value `b'.  The operation is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat128 float128_rem( float128 a, float128 b )
129203Scognet{
129203Scognet    flag aSign, bSign, zSign;
129203Scognet    int32 aExp, bExp, expDiff;
129203Scognet    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
129203Scognet    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
129203Scognet    sbits64 sigMean0;
129203Scognet    float128 z;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    bSig1 = extractFloat128Frac1( b );
129203Scognet    bSig0 = extractFloat128Frac0( b );
129203Scognet    bExp = extractFloat128Exp( b );
129203Scognet    bSign = extractFloat128Sign( b );
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if (    ( aSig0 | aSig1 )
129203Scognet             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
129203Scognet            return propagateFloat128NaN( a, b );
129203Scognet        }
129203Scognet        goto invalid;
129203Scognet    }
129203Scognet    if ( bExp == 0x7FFF ) {
129203Scognet        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
129203Scognet        return a;
129203Scognet    }
129203Scognet    if ( bExp == 0 ) {
129203Scognet        if ( ( bSig0 | bSig1 ) == 0 ) {
129203Scognet invalid:
129203Scognet            float_raise( float_flag_invalid );
129203Scognet            z.low = float128_default_nan_low;
129203Scognet            z.high = float128_default_nan_high;
129203Scognet            return z;
129203Scognet        }
129203Scognet        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return a;
129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
129203Scognet    }
129203Scognet    expDiff = aExp - bExp;
129203Scognet    if ( expDiff < -1 ) return a;
129203Scognet    shortShift128Left(
129203Scognet        aSig0 | LIT64( 0x0001000000000000 ),
129203Scognet        aSig1,
129203Scognet        15 - ( expDiff < 0 ),
129203Scognet        &aSig0,
129203Scognet        &aSig1
129203Scognet    );
129203Scognet    shortShift128Left(
129203Scognet        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
129203Scognet    q = le128( bSig0, bSig1, aSig0, aSig1 );
129203Scognet    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
129203Scognet    expDiff -= 64;
129203Scognet    while ( 0 < expDiff ) {
129203Scognet        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
129203Scognet        q = ( 4 < q ) ? q - 4 : 0;
129203Scognet        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
129203Scognet        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
129203Scognet        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
129203Scognet        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
129203Scognet        expDiff -= 61;
129203Scognet    }
129203Scognet    if ( -64 < expDiff ) {
129203Scognet        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
129203Scognet        q = ( 4 < q ) ? q - 4 : 0;
129203Scognet        q >>= - expDiff;
129203Scognet        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
129203Scognet        expDiff += 52;
129203Scognet        if ( expDiff < 0 ) {
129203Scognet            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
129203Scognet        }
129203Scognet        else {
129203Scognet            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
129203Scognet        }
129203Scognet        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
129203Scognet        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
129203Scognet    }
129203Scognet    else {
129203Scognet        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
129203Scognet        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
129203Scognet    }
129203Scognet    do {
129203Scognet        alternateASig0 = aSig0;
129203Scognet        alternateASig1 = aSig1;
129203Scognet        ++q;
129203Scognet        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
129203Scognet    } while ( 0 <= (sbits64) aSig0 );
129203Scognet    add128(
230363Sdas        aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
129203Scognet    if (    ( sigMean0 < 0 )
129203Scognet         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
129203Scognet        aSig0 = alternateASig0;
129203Scognet        aSig1 = alternateASig1;
129203Scognet    }
129203Scognet    zSign = ( (sbits64) aSig0 < 0 );
129203Scognet    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
129203Scognet    return
129203Scognet        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the square root of the quadruple-precision floating-point value `a'.
129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary
129203ScognetFloating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetfloat128 float128_sqrt( float128 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int32 aExp, zExp;
129203Scognet    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
129203Scognet    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
129203Scognet    float128 z;
129203Scognet
129203Scognet    aSig1 = extractFloat128Frac1( a );
129203Scognet    aSig0 = extractFloat128Frac0( a );
129203Scognet    aExp = extractFloat128Exp( a );
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    if ( aExp == 0x7FFF ) {
129203Scognet        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
129203Scognet        if ( ! aSign ) return a;
129203Scognet        goto invalid;
129203Scognet    }
129203Scognet    if ( aSign ) {
129203Scognet        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
129203Scognet invalid:
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        z.low = float128_default_nan_low;
129203Scognet        z.high = float128_default_nan_high;
129203Scognet        return z;
129203Scognet    }
129203Scognet    if ( aExp == 0 ) {
129203Scognet        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
129203Scognet        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
129203Scognet    }
129203Scognet    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
129203Scognet    aSig0 |= LIT64( 0x0001000000000000 );
129203Scognet    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
129203Scognet    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
129203Scognet    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
129203Scognet    doubleZSig0 = zSig0<<1;
129203Scognet    mul64To128( zSig0, zSig0, &term0, &term1 );
129203Scognet    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
129203Scognet    while ( (sbits64) rem0 < 0 ) {
129203Scognet        --zSig0;
129203Scognet        doubleZSig0 -= 2;
129203Scognet        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
129203Scognet    }
129203Scognet    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
129203Scognet    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
129203Scognet        if ( zSig1 == 0 ) zSig1 = 1;
129203Scognet        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
129203Scognet        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
129203Scognet        mul64To128( zSig1, zSig1, &term2, &term3 );
129203Scognet        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
129203Scognet        while ( (sbits64) rem1 < 0 ) {
129203Scognet            --zSig1;
129203Scognet            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
129203Scognet            term3 |= 1;
129203Scognet            term2 |= doubleZSig0;
129203Scognet            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
129203Scognet        }
129203Scognet        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
129203Scognet    }
129203Scognet    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
129203Scognet    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is equal to
129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float128_eq( float128 a, float128 b )
129203Scognet{
129203Scognet
129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
129203Scognet       ) {
129203Scognet        if (    float128_is_signaling_nan( a )
129203Scognet             || float128_is_signaling_nan( b ) ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet        }
129203Scognet        return 0;
129203Scognet    }
129203Scognet    return
129203Scognet           ( a.low == b.low )
129203Scognet        && (    ( a.high == b.high )
129203Scognet             || (    ( a.low == 0 )
129203Scognet                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
129203Scognet           );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than
129203Scognetor equal to the corresponding value `b', and 0 otherwise.  The comparison
129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point
129203ScognetArithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float128_le( float128 a, float128 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
129203Scognet       ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    bSign = extractFloat128Sign( b );
129203Scognet    if ( aSign != bSign ) {
129203Scognet        return
129203Scognet               aSign
129203Scognet            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
129203Scognet                 == 0 );
129203Scognet    }
129203Scognet    return
129203Scognet          aSign ? le128( b.high, b.low, a.high, a.low )
129203Scognet        : le128( a.high, a.low, b.high, b.low );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than
129203Scognetthe corresponding value `b', and 0 otherwise.  The comparison is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float128_lt( float128 a, float128 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
129203Scognet       ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    bSign = extractFloat128Sign( b );
129203Scognet    if ( aSign != bSign ) {
129203Scognet        return
129203Scognet               aSign
129203Scognet            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
129203Scognet                 != 0 );
129203Scognet    }
129203Scognet    return
129203Scognet          aSign ? lt128( b.high, b.low, a.high, a.low )
129203Scognet        : lt128( a.high, a.low, b.high, b.low );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is equal to
129203Scognetthe corresponding value `b', and 0 otherwise.  The invalid exception is
129203Scognetraised if either operand is a NaN.  Otherwise, the comparison is performed
129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float128_eq_signaling( float128 a, float128 b )
129203Scognet{
129203Scognet
129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
129203Scognet       ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0;
129203Scognet    }
129203Scognet    return
129203Scognet           ( a.low == b.low )
129203Scognet        && (    ( a.high == b.high )
129203Scognet             || (    ( a.low == 0 )
129203Scognet                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
129203Scognet           );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than
129203Scognetor equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
129203Scognetcause an exception.  Otherwise, the comparison is performed according to the
129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float128_le_quiet( float128 a, float128 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
129203Scognet       ) {
129203Scognet        if (    float128_is_signaling_nan( a )
129203Scognet             || float128_is_signaling_nan( b ) ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet        }
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    bSign = extractFloat128Sign( b );
129203Scognet    if ( aSign != bSign ) {
129203Scognet        return
129203Scognet               aSign
129203Scognet            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
129203Scognet                 == 0 );
129203Scognet    }
129203Scognet    return
129203Scognet          aSign ? le128( b.high, b.low, a.high, a.low )
129203Scognet        : le128( a.high, a.low, b.high, b.low );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than
129203Scognetthe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
129203Scognetexception.  Otherwise, the comparison is performed according to the IEC/IEEE
129203ScognetStandard for Binary Floating-Point Arithmetic.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetflag float128_lt_quiet( float128 a, float128 b )
129203Scognet{
129203Scognet    flag aSign, bSign;
129203Scognet
129203Scognet    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
129203Scognet              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
129203Scognet         || (    ( extractFloat128Exp( b ) == 0x7FFF )
129203Scognet              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
129203Scognet       ) {
129203Scognet        if (    float128_is_signaling_nan( a )
129203Scognet             || float128_is_signaling_nan( b ) ) {
129203Scognet            float_raise( float_flag_invalid );
129203Scognet        }
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSign = extractFloat128Sign( a );
129203Scognet    bSign = extractFloat128Sign( b );
129203Scognet    if ( aSign != bSign ) {
129203Scognet        return
129203Scognet               aSign
129203Scognet            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
129203Scognet                 != 0 );
129203Scognet    }
129203Scognet    return
129203Scognet          aSign ? lt128( b.high, b.low, a.high, a.low )
129203Scognet        : lt128( a.high, a.low, b.high, b.low );
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif
129203Scognet
129203Scognet
129203Scognet#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
129203Scognet
129203Scognet/*
129203Scognet * These two routines are not part of the original softfloat distribution.
129203Scognet *
129203Scognet * They are based on the corresponding conversions to integer but return
129203Scognet * unsigned numbers instead since these functions are required by GCC.
129203Scognet *
129203Scognet * Added by Mark Brinicombe <mark@NetBSD.org>	27/09/97
129203Scognet *
129203Scognet * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
129203Scognet */
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the double-precision floating-point value
129203Scognet`a' to the 32-bit unsigned integer format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-point
129203ScognetArithmetic, except that the conversion is always rounded toward zero.  If
129203Scognet`a' is a NaN, the largest positive integer is returned.  If the conversion
129203Scognetoverflows, the largest integer positive is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetuint32 float64_to_uint32_round_to_zero( float64 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp, shiftCount;
129203Scognet    bits64 aSig, savedASig;
129203Scognet    uint32 z;
129203Scognet
129203Scognet    aSig = extractFloat64Frac( a );
129203Scognet    aExp = extractFloat64Exp( a );
129203Scognet    aSign = extractFloat64Sign( a );
129203Scognet
129203Scognet    if (aSign) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet    	return(0);
129203Scognet    }
129203Scognet
129203Scognet    if ( 0x41E < aExp ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0xffffffff;
129203Scognet    }
129203Scognet    else if ( aExp < 0x3FF ) {
129203Scognet        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSig |= LIT64( 0x0010000000000000 );
129203Scognet    shiftCount = 0x433 - aExp;
129203Scognet    savedASig = aSig;
129203Scognet    aSig >>= shiftCount;
129203Scognet    z = aSig;
129203Scognet    if ( ( aSig<<shiftCount ) != savedASig ) {
129203Scognet        float_exception_flags |= float_flag_inexact;
129203Scognet    }
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet/*
129203Scognet-------------------------------------------------------------------------------
129203ScognetReturns the result of converting the single-precision floating-point value
129203Scognet`a' to the 32-bit unsigned integer format.  The conversion is
129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-point
129203ScognetArithmetic, except that the conversion is always rounded toward zero.  If
129203Scognet`a' is a NaN, the largest positive integer is returned.  If the conversion
129203Scognetoverflows, the largest positive integer is returned.
129203Scognet-------------------------------------------------------------------------------
129203Scognet*/
129203Scognetuint32 float32_to_uint32_round_to_zero( float32 a )
129203Scognet{
129203Scognet    flag aSign;
129203Scognet    int16 aExp, shiftCount;
129203Scognet    bits32 aSig;
129203Scognet    uint32 z;
129203Scognet
129203Scognet    aSig = extractFloat32Frac( a );
129203Scognet    aExp = extractFloat32Exp( a );
129203Scognet    aSign = extractFloat32Sign( a );
129203Scognet    shiftCount = aExp - 0x9E;
129203Scognet
129203Scognet    if (aSign) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet    	return(0);
129203Scognet    }
129203Scognet    if ( 0 < shiftCount ) {
129203Scognet        float_raise( float_flag_invalid );
129203Scognet        return 0xFFFFFFFF;
129203Scognet    }
129203Scognet    else if ( aExp <= 0x7E ) {
129203Scognet        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
129203Scognet        return 0;
129203Scognet    }
129203Scognet    aSig = ( aSig | 0x800000 )<<8;
129203Scognet    z = aSig>>( - shiftCount );
129203Scognet    if ( aSig<<( shiftCount & 31 ) ) {
129203Scognet        float_exception_flags |= float_flag_inexact;
129203Scognet    }
129203Scognet    return z;
129203Scognet
129203Scognet}
129203Scognet
129203Scognet#endif