1230363Sdas/* $NetBSD: softfloat.c,v 1.8 2011/07/10 04:52:23 matt Exp $ */ 2129203Scognet 3129203Scognet/* 4129203Scognet * This version hacked for use with gcc -msoft-float by bjh21. 5129203Scognet * (Mostly a case of #ifdefing out things GCC doesn't need or provides 6129203Scognet * itself). 7129203Scognet */ 8129203Scognet 9129203Scognet/* 10129203Scognet * Things you may want to define: 11129203Scognet * 12129203Scognet * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with 13129203Scognet * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them 14129203Scognet * properly renamed. 15129203Scognet */ 16129203Scognet 17129203Scognet/* 18129203Scognet=============================================================================== 19129203Scognet 20129203ScognetThis C source file is part of the SoftFloat IEC/IEEE Floating-point 21129203ScognetArithmetic Package, Release 2a. 22129203Scognet 23129203ScognetWritten by John R. Hauser. This work was made possible in part by the 24129203ScognetInternational Computer Science Institute, located at Suite 600, 1947 Center 25129203ScognetStreet, Berkeley, California 94704. Funding was partially provided by the 26129203ScognetNational Science Foundation under grant MIP-9311980. The original version 27129203Scognetof this code was written as part of a project to build a fixed-point vector 28129203Scognetprocessor in collaboration with the University of California at Berkeley, 29129203Scognetoverseen by Profs. Nelson Morgan and John Wawrzynek. More information 30129203Scognetis available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 31129203Scognetarithmetic/SoftFloat.html'. 32129203Scognet 33129203ScognetTHIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 34129203Scognethas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 35129203ScognetTIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 36129203ScognetPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 37129203ScognetAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 38129203Scognet 39129203ScognetDerivative works are acceptable, even for commercial purposes, so long as 40129203Scognet(1) they include prominent notice that the work is derivative, and (2) they 41129203Scognetinclude prominent notice akin to these four paragraphs for those parts of 42129203Scognetthis code that are retained. 43129203Scognet 44129203Scognet=============================================================================== 45129203Scognet*/ 46129203Scognet 47129203Scognet#include <sys/cdefs.h> 48129203Scognet__FBSDID("$FreeBSD$"); 49129203Scognet 50129203Scognet#ifdef SOFTFLOAT_FOR_GCC 51129203Scognet#include "softfloat-for-gcc.h" 52129203Scognet#endif 53129203Scognet 54129203Scognet#include "milieu.h" 55129203Scognet#include "softfloat.h" 56129203Scognet 57129203Scognet/* 58129203Scognet * Conversions between floats as stored in memory and floats as 59129203Scognet * SoftFloat uses them 60129203Scognet */ 61129203Scognet#ifndef FLOAT64_DEMANGLE 62129203Scognet#define FLOAT64_DEMANGLE(a) (a) 63129203Scognet#endif 64129203Scognet#ifndef FLOAT64_MANGLE 65129203Scognet#define FLOAT64_MANGLE(a) (a) 66129203Scognet#endif 67129203Scognet 68129203Scognet/* 69129203Scognet------------------------------------------------------------------------------- 70129203ScognetFloating-point rounding mode, extended double-precision rounding precision, 71129203Scognetand exception flags. 72129203Scognet------------------------------------------------------------------------------- 73129203Scognet*/ 74230189Sdasint float_rounding_mode = float_round_nearest_even; 75230189Sdasint float_exception_flags = 0; 76129203Scognet#ifdef FLOATX80 77129203Scognetint8 floatx80_rounding_precision = 80; 78129203Scognet#endif 79129203Scognet 80129203Scognet/* 81129203Scognet------------------------------------------------------------------------------- 82129203ScognetPrimitive arithmetic functions, including multi-word arithmetic, and 83129203Scognetdivision and square root approximations. (Can be specialized to target if 84129203Scognetdesired.) 85129203Scognet------------------------------------------------------------------------------- 86129203Scognet*/ 87129203Scognet#include "softfloat-macros" 88129203Scognet 89129203Scognet/* 90129203Scognet------------------------------------------------------------------------------- 91129203ScognetFunctions and definitions to determine: (1) whether tininess for underflow 92129203Scognetis detected before or after rounding by default, (2) what (if anything) 93129203Scognethappens when exceptions are raised, (3) how signaling NaNs are distinguished 94129203Scognetfrom quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs 95129203Scognetare propagated from function inputs to output. These details are target- 96129203Scognetspecific. 97129203Scognet------------------------------------------------------------------------------- 98129203Scognet*/ 99129203Scognet#include "softfloat-specialize" 100129203Scognet 101129203Scognet#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128) 102129203Scognet/* 103129203Scognet------------------------------------------------------------------------------- 104129203ScognetTakes a 64-bit fixed-point value `absZ' with binary point between bits 6 105129203Scognetand 7, and returns the properly rounded 32-bit integer corresponding to the 106129203Scognetinput. If `zSign' is 1, the input is negated before being converted to an 107129203Scognetinteger. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input 108129203Scognetis simply rounded to an integer, with the inexact exception raised if the 109129203Scognetinput cannot be represented exactly as an integer. However, if the fixed- 110129203Scognetpoint input is too large, the invalid exception is raised and the largest 111129203Scognetpositive or negative integer is returned. 112129203Scognet------------------------------------------------------------------------------- 113129203Scognet*/ 114129203Scognetstatic int32 roundAndPackInt32( flag zSign, bits64 absZ ) 115129203Scognet{ 116129203Scognet int8 roundingMode; 117129203Scognet flag roundNearestEven; 118129203Scognet int8 roundIncrement, roundBits; 119129203Scognet int32 z; 120129203Scognet 121129203Scognet roundingMode = float_rounding_mode; 122129203Scognet roundNearestEven = ( roundingMode == float_round_nearest_even ); 123129203Scognet roundIncrement = 0x40; 124129203Scognet if ( ! roundNearestEven ) { 125129203Scognet if ( roundingMode == float_round_to_zero ) { 126129203Scognet roundIncrement = 0; 127129203Scognet } 128129203Scognet else { 129129203Scognet roundIncrement = 0x7F; 130129203Scognet if ( zSign ) { 131129203Scognet if ( roundingMode == float_round_up ) roundIncrement = 0; 132129203Scognet } 133129203Scognet else { 134129203Scognet if ( roundingMode == float_round_down ) roundIncrement = 0; 135129203Scognet } 136129203Scognet } 137129203Scognet } 138129203Scognet roundBits = absZ & 0x7F; 139129203Scognet absZ = ( absZ + roundIncrement )>>7; 140129203Scognet absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 141129203Scognet z = absZ; 142129203Scognet if ( zSign ) z = - z; 143129203Scognet if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { 144129203Scognet float_raise( float_flag_invalid ); 145129203Scognet return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 146129203Scognet } 147129203Scognet if ( roundBits ) float_exception_flags |= float_flag_inexact; 148129203Scognet return z; 149129203Scognet 150129203Scognet} 151129203Scognet 152129203Scognet/* 153129203Scognet------------------------------------------------------------------------------- 154129203ScognetTakes the 128-bit fixed-point value formed by concatenating `absZ0' and 155129203Scognet`absZ1', with binary point between bits 63 and 64 (between the input words), 156129203Scognetand returns the properly rounded 64-bit integer corresponding to the input. 157129203ScognetIf `zSign' is 1, the input is negated before being converted to an integer. 158129203ScognetOrdinarily, the fixed-point input is simply rounded to an integer, with 159129203Scognetthe inexact exception raised if the input cannot be represented exactly as 160129203Scognetan integer. However, if the fixed-point input is too large, the invalid 161129203Scognetexception is raised and the largest positive or negative integer is 162129203Scognetreturned. 163129203Scognet------------------------------------------------------------------------------- 164129203Scognet*/ 165129203Scognetstatic int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 ) 166129203Scognet{ 167129203Scognet int8 roundingMode; 168129203Scognet flag roundNearestEven, increment; 169129203Scognet int64 z; 170129203Scognet 171129203Scognet roundingMode = float_rounding_mode; 172129203Scognet roundNearestEven = ( roundingMode == float_round_nearest_even ); 173129203Scognet increment = ( (sbits64) absZ1 < 0 ); 174129203Scognet if ( ! roundNearestEven ) { 175129203Scognet if ( roundingMode == float_round_to_zero ) { 176129203Scognet increment = 0; 177129203Scognet } 178129203Scognet else { 179129203Scognet if ( zSign ) { 180129203Scognet increment = ( roundingMode == float_round_down ) && absZ1; 181129203Scognet } 182129203Scognet else { 183129203Scognet increment = ( roundingMode == float_round_up ) && absZ1; 184129203Scognet } 185129203Scognet } 186129203Scognet } 187129203Scognet if ( increment ) { 188129203Scognet ++absZ0; 189129203Scognet if ( absZ0 == 0 ) goto overflow; 190129203Scognet absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven ); 191129203Scognet } 192129203Scognet z = absZ0; 193129203Scognet if ( zSign ) z = - z; 194129203Scognet if ( z && ( ( z < 0 ) ^ zSign ) ) { 195129203Scognet overflow: 196129203Scognet float_raise( float_flag_invalid ); 197129203Scognet return 198129203Scognet zSign ? (sbits64) LIT64( 0x8000000000000000 ) 199129203Scognet : LIT64( 0x7FFFFFFFFFFFFFFF ); 200129203Scognet } 201129203Scognet if ( absZ1 ) float_exception_flags |= float_flag_inexact; 202129203Scognet return z; 203129203Scognet 204129203Scognet} 205129203Scognet#endif 206129203Scognet 207129203Scognet/* 208129203Scognet------------------------------------------------------------------------------- 209129203ScognetReturns the fraction bits of the single-precision floating-point value `a'. 210129203Scognet------------------------------------------------------------------------------- 211129203Scognet*/ 212129203ScognetINLINE bits32 extractFloat32Frac( float32 a ) 213129203Scognet{ 214129203Scognet 215129203Scognet return a & 0x007FFFFF; 216129203Scognet 217129203Scognet} 218129203Scognet 219129203Scognet/* 220129203Scognet------------------------------------------------------------------------------- 221129203ScognetReturns the exponent bits of the single-precision floating-point value `a'. 222129203Scognet------------------------------------------------------------------------------- 223129203Scognet*/ 224129203ScognetINLINE int16 extractFloat32Exp( float32 a ) 225129203Scognet{ 226129203Scognet 227129203Scognet return ( a>>23 ) & 0xFF; 228129203Scognet 229129203Scognet} 230129203Scognet 231129203Scognet/* 232129203Scognet------------------------------------------------------------------------------- 233129203ScognetReturns the sign bit of the single-precision floating-point value `a'. 234129203Scognet------------------------------------------------------------------------------- 235129203Scognet*/ 236129203ScognetINLINE flag extractFloat32Sign( float32 a ) 237129203Scognet{ 238129203Scognet 239129203Scognet return a>>31; 240129203Scognet 241129203Scognet} 242129203Scognet 243129203Scognet/* 244129203Scognet------------------------------------------------------------------------------- 245129203ScognetNormalizes the subnormal single-precision floating-point value represented 246129203Scognetby the denormalized significand `aSig'. The normalized exponent and 247129203Scognetsignificand are stored at the locations pointed to by `zExpPtr' and 248129203Scognet`zSigPtr', respectively. 249129203Scognet------------------------------------------------------------------------------- 250129203Scognet*/ 251129203Scognetstatic void 252129203Scognet normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) 253129203Scognet{ 254129203Scognet int8 shiftCount; 255129203Scognet 256129203Scognet shiftCount = countLeadingZeros32( aSig ) - 8; 257129203Scognet *zSigPtr = aSig<<shiftCount; 258129203Scognet *zExpPtr = 1 - shiftCount; 259129203Scognet 260129203Scognet} 261129203Scognet 262129203Scognet/* 263129203Scognet------------------------------------------------------------------------------- 264129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into a 265129203Scognetsingle-precision floating-point value, returning the result. After being 266129203Scognetshifted into the proper positions, the three fields are simply added 267129203Scognettogether to form the result. This means that any integer portion of `zSig' 268129203Scognetwill be added into the exponent. Since a properly normalized significand 269129203Scognetwill have an integer portion equal to 1, the `zExp' input should be 1 less 270129203Scognetthan the desired result exponent whenever `zSig' is a complete, normalized 271129203Scognetsignificand. 272129203Scognet------------------------------------------------------------------------------- 273129203Scognet*/ 274129203ScognetINLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) 275129203Scognet{ 276129203Scognet 277129203Scognet return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; 278129203Scognet 279129203Scognet} 280129203Scognet 281129203Scognet/* 282129203Scognet------------------------------------------------------------------------------- 283129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 284129203Scognetand significand `zSig', and returns the proper single-precision floating- 285129203Scognetpoint value corresponding to the abstract input. Ordinarily, the abstract 286129203Scognetvalue is simply rounded and packed into the single-precision format, with 287129203Scognetthe inexact exception raised if the abstract input cannot be represented 288129203Scognetexactly. However, if the abstract value is too large, the overflow and 289129203Scognetinexact exceptions are raised and an infinity or maximal finite value is 290129203Scognetreturned. If the abstract value is too small, the input value is rounded to 291129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if 292129203Scognetthe abstract input cannot be represented exactly as a subnormal single- 293129203Scognetprecision floating-point number. 294129203Scognet The input significand `zSig' has its binary point between bits 30 295129203Scognetand 29, which is 7 bits to the left of the usual location. This shifted 296129203Scognetsignificand must be normalized or smaller. If `zSig' is not normalized, 297129203Scognet`zExp' must be 0; in that case, the result returned is a subnormal number, 298129203Scognetand it must not require rounding. In the usual case that `zSig' is 299129203Scognetnormalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 300129203ScognetThe handling of underflow and overflow follows the IEC/IEEE Standard for 301129203ScognetBinary Floating-Point Arithmetic. 302129203Scognet------------------------------------------------------------------------------- 303129203Scognet*/ 304129203Scognetstatic float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 305129203Scognet{ 306129203Scognet int8 roundingMode; 307129203Scognet flag roundNearestEven; 308129203Scognet int8 roundIncrement, roundBits; 309129203Scognet flag isTiny; 310129203Scognet 311129203Scognet roundingMode = float_rounding_mode; 312129203Scognet roundNearestEven = ( roundingMode == float_round_nearest_even ); 313129203Scognet roundIncrement = 0x40; 314129203Scognet if ( ! roundNearestEven ) { 315129203Scognet if ( roundingMode == float_round_to_zero ) { 316129203Scognet roundIncrement = 0; 317129203Scognet } 318129203Scognet else { 319129203Scognet roundIncrement = 0x7F; 320129203Scognet if ( zSign ) { 321129203Scognet if ( roundingMode == float_round_up ) roundIncrement = 0; 322129203Scognet } 323129203Scognet else { 324129203Scognet if ( roundingMode == float_round_down ) roundIncrement = 0; 325129203Scognet } 326129203Scognet } 327129203Scognet } 328129203Scognet roundBits = zSig & 0x7F; 329129203Scognet if ( 0xFD <= (bits16) zExp ) { 330129203Scognet if ( ( 0xFD < zExp ) 331129203Scognet || ( ( zExp == 0xFD ) 332129203Scognet && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) 333129203Scognet ) { 334129203Scognet float_raise( float_flag_overflow | float_flag_inexact ); 335129203Scognet return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); 336129203Scognet } 337129203Scognet if ( zExp < 0 ) { 338129203Scognet isTiny = 339129203Scognet ( float_detect_tininess == float_tininess_before_rounding ) 340129203Scognet || ( zExp < -1 ) 341129203Scognet || ( zSig + roundIncrement < 0x80000000 ); 342129203Scognet shift32RightJamming( zSig, - zExp, &zSig ); 343129203Scognet zExp = 0; 344129203Scognet roundBits = zSig & 0x7F; 345129203Scognet if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 346129203Scognet } 347129203Scognet } 348129203Scognet if ( roundBits ) float_exception_flags |= float_flag_inexact; 349129203Scognet zSig = ( zSig + roundIncrement )>>7; 350129203Scognet zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 351129203Scognet if ( zSig == 0 ) zExp = 0; 352129203Scognet return packFloat32( zSign, zExp, zSig ); 353129203Scognet 354129203Scognet} 355129203Scognet 356129203Scognet/* 357129203Scognet------------------------------------------------------------------------------- 358129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 359129203Scognetand significand `zSig', and returns the proper single-precision floating- 360129203Scognetpoint value corresponding to the abstract input. This routine is just like 361129203Scognet`roundAndPackFloat32' except that `zSig' does not have to be normalized. 362129203ScognetBit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 363129203Scognetfloating-point exponent. 364129203Scognet------------------------------------------------------------------------------- 365129203Scognet*/ 366129203Scognetstatic float32 367129203Scognet normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 368129203Scognet{ 369129203Scognet int8 shiftCount; 370129203Scognet 371129203Scognet shiftCount = countLeadingZeros32( zSig ) - 1; 372129203Scognet return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); 373129203Scognet 374129203Scognet} 375129203Scognet 376129203Scognet/* 377129203Scognet------------------------------------------------------------------------------- 378129203ScognetReturns the fraction bits of the double-precision floating-point value `a'. 379129203Scognet------------------------------------------------------------------------------- 380129203Scognet*/ 381129203ScognetINLINE bits64 extractFloat64Frac( float64 a ) 382129203Scognet{ 383129203Scognet 384129203Scognet return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF ); 385129203Scognet 386129203Scognet} 387129203Scognet 388129203Scognet/* 389129203Scognet------------------------------------------------------------------------------- 390129203ScognetReturns the exponent bits of the double-precision floating-point value `a'. 391129203Scognet------------------------------------------------------------------------------- 392129203Scognet*/ 393129203ScognetINLINE int16 extractFloat64Exp( float64 a ) 394129203Scognet{ 395129203Scognet 396129203Scognet return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF; 397129203Scognet 398129203Scognet} 399129203Scognet 400129203Scognet/* 401129203Scognet------------------------------------------------------------------------------- 402129203ScognetReturns the sign bit of the double-precision floating-point value `a'. 403129203Scognet------------------------------------------------------------------------------- 404129203Scognet*/ 405129203ScognetINLINE flag extractFloat64Sign( float64 a ) 406129203Scognet{ 407129203Scognet 408129203Scognet return FLOAT64_DEMANGLE(a)>>63; 409129203Scognet 410129203Scognet} 411129203Scognet 412129203Scognet/* 413129203Scognet------------------------------------------------------------------------------- 414129203ScognetNormalizes the subnormal double-precision floating-point value represented 415129203Scognetby the denormalized significand `aSig'. The normalized exponent and 416129203Scognetsignificand are stored at the locations pointed to by `zExpPtr' and 417129203Scognet`zSigPtr', respectively. 418129203Scognet------------------------------------------------------------------------------- 419129203Scognet*/ 420129203Scognetstatic void 421129203Scognet normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) 422129203Scognet{ 423129203Scognet int8 shiftCount; 424129203Scognet 425129203Scognet shiftCount = countLeadingZeros64( aSig ) - 11; 426129203Scognet *zSigPtr = aSig<<shiftCount; 427129203Scognet *zExpPtr = 1 - shiftCount; 428129203Scognet 429129203Scognet} 430129203Scognet 431129203Scognet/* 432129203Scognet------------------------------------------------------------------------------- 433129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into a 434129203Scognetdouble-precision floating-point value, returning the result. After being 435129203Scognetshifted into the proper positions, the three fields are simply added 436129203Scognettogether to form the result. This means that any integer portion of `zSig' 437129203Scognetwill be added into the exponent. Since a properly normalized significand 438129203Scognetwill have an integer portion equal to 1, the `zExp' input should be 1 less 439129203Scognetthan the desired result exponent whenever `zSig' is a complete, normalized 440129203Scognetsignificand. 441129203Scognet------------------------------------------------------------------------------- 442129203Scognet*/ 443129203ScognetINLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig ) 444129203Scognet{ 445129203Scognet 446129203Scognet return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) + 447129203Scognet ( ( (bits64) zExp )<<52 ) + zSig ); 448129203Scognet 449129203Scognet} 450129203Scognet 451129203Scognet/* 452129203Scognet------------------------------------------------------------------------------- 453129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 454129203Scognetand significand `zSig', and returns the proper double-precision floating- 455129203Scognetpoint value corresponding to the abstract input. Ordinarily, the abstract 456129203Scognetvalue is simply rounded and packed into the double-precision format, with 457129203Scognetthe inexact exception raised if the abstract input cannot be represented 458129203Scognetexactly. However, if the abstract value is too large, the overflow and 459129203Scognetinexact exceptions are raised and an infinity or maximal finite value is 460129203Scognetreturned. If the abstract value is too small, the input value is rounded to 461129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if 462129203Scognetthe abstract input cannot be represented exactly as a subnormal double- 463129203Scognetprecision floating-point number. 464129203Scognet The input significand `zSig' has its binary point between bits 62 465129203Scognetand 61, which is 10 bits to the left of the usual location. This shifted 466129203Scognetsignificand must be normalized or smaller. If `zSig' is not normalized, 467129203Scognet`zExp' must be 0; in that case, the result returned is a subnormal number, 468129203Scognetand it must not require rounding. In the usual case that `zSig' is 469129203Scognetnormalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 470129203ScognetThe handling of underflow and overflow follows the IEC/IEEE Standard for 471129203ScognetBinary Floating-Point Arithmetic. 472129203Scognet------------------------------------------------------------------------------- 473129203Scognet*/ 474129203Scognetstatic float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) 475129203Scognet{ 476129203Scognet int8 roundingMode; 477129203Scognet flag roundNearestEven; 478129203Scognet int16 roundIncrement, roundBits; 479129203Scognet flag isTiny; 480129203Scognet 481129203Scognet roundingMode = float_rounding_mode; 482129203Scognet roundNearestEven = ( roundingMode == float_round_nearest_even ); 483129203Scognet roundIncrement = 0x200; 484129203Scognet if ( ! roundNearestEven ) { 485129203Scognet if ( roundingMode == float_round_to_zero ) { 486129203Scognet roundIncrement = 0; 487129203Scognet } 488129203Scognet else { 489129203Scognet roundIncrement = 0x3FF; 490129203Scognet if ( zSign ) { 491129203Scognet if ( roundingMode == float_round_up ) roundIncrement = 0; 492129203Scognet } 493129203Scognet else { 494129203Scognet if ( roundingMode == float_round_down ) roundIncrement = 0; 495129203Scognet } 496129203Scognet } 497129203Scognet } 498129203Scognet roundBits = zSig & 0x3FF; 499129203Scognet if ( 0x7FD <= (bits16) zExp ) { 500129203Scognet if ( ( 0x7FD < zExp ) 501129203Scognet || ( ( zExp == 0x7FD ) 502129203Scognet && ( (sbits64) ( zSig + roundIncrement ) < 0 ) ) 503129203Scognet ) { 504129203Scognet float_raise( float_flag_overflow | float_flag_inexact ); 505129203Scognet return FLOAT64_MANGLE( 506129203Scognet FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) - 507129203Scognet ( roundIncrement == 0 )); 508129203Scognet } 509129203Scognet if ( zExp < 0 ) { 510129203Scognet isTiny = 511129203Scognet ( float_detect_tininess == float_tininess_before_rounding ) 512129203Scognet || ( zExp < -1 ) 513129203Scognet || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); 514129203Scognet shift64RightJamming( zSig, - zExp, &zSig ); 515129203Scognet zExp = 0; 516129203Scognet roundBits = zSig & 0x3FF; 517129203Scognet if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 518129203Scognet } 519129203Scognet } 520129203Scognet if ( roundBits ) float_exception_flags |= float_flag_inexact; 521129203Scognet zSig = ( zSig + roundIncrement )>>10; 522129203Scognet zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); 523129203Scognet if ( zSig == 0 ) zExp = 0; 524129203Scognet return packFloat64( zSign, zExp, zSig ); 525129203Scognet 526129203Scognet} 527129203Scognet 528129203Scognet/* 529129203Scognet------------------------------------------------------------------------------- 530129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 531129203Scognetand significand `zSig', and returns the proper double-precision floating- 532129203Scognetpoint value corresponding to the abstract input. This routine is just like 533129203Scognet`roundAndPackFloat64' except that `zSig' does not have to be normalized. 534129203ScognetBit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 535129203Scognetfloating-point exponent. 536129203Scognet------------------------------------------------------------------------------- 537129203Scognet*/ 538129203Scognetstatic float64 539129203Scognet normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) 540129203Scognet{ 541129203Scognet int8 shiftCount; 542129203Scognet 543129203Scognet shiftCount = countLeadingZeros64( zSig ) - 1; 544129203Scognet return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount ); 545129203Scognet 546129203Scognet} 547129203Scognet 548129203Scognet#ifdef FLOATX80 549129203Scognet 550129203Scognet/* 551129203Scognet------------------------------------------------------------------------------- 552129203ScognetReturns the fraction bits of the extended double-precision floating-point 553129203Scognetvalue `a'. 554129203Scognet------------------------------------------------------------------------------- 555129203Scognet*/ 556129203ScognetINLINE bits64 extractFloatx80Frac( floatx80 a ) 557129203Scognet{ 558129203Scognet 559129203Scognet return a.low; 560129203Scognet 561129203Scognet} 562129203Scognet 563129203Scognet/* 564129203Scognet------------------------------------------------------------------------------- 565129203ScognetReturns the exponent bits of the extended double-precision floating-point 566129203Scognetvalue `a'. 567129203Scognet------------------------------------------------------------------------------- 568129203Scognet*/ 569129203ScognetINLINE int32 extractFloatx80Exp( floatx80 a ) 570129203Scognet{ 571129203Scognet 572129203Scognet return a.high & 0x7FFF; 573129203Scognet 574129203Scognet} 575129203Scognet 576129203Scognet/* 577129203Scognet------------------------------------------------------------------------------- 578129203ScognetReturns the sign bit of the extended double-precision floating-point value 579129203Scognet`a'. 580129203Scognet------------------------------------------------------------------------------- 581129203Scognet*/ 582129203ScognetINLINE flag extractFloatx80Sign( floatx80 a ) 583129203Scognet{ 584129203Scognet 585129203Scognet return a.high>>15; 586129203Scognet 587129203Scognet} 588129203Scognet 589129203Scognet/* 590129203Scognet------------------------------------------------------------------------------- 591129203ScognetNormalizes the subnormal extended double-precision floating-point value 592129203Scognetrepresented by the denormalized significand `aSig'. The normalized exponent 593129203Scognetand significand are stored at the locations pointed to by `zExpPtr' and 594129203Scognet`zSigPtr', respectively. 595129203Scognet------------------------------------------------------------------------------- 596129203Scognet*/ 597129203Scognetstatic void 598129203Scognet normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) 599129203Scognet{ 600129203Scognet int8 shiftCount; 601129203Scognet 602129203Scognet shiftCount = countLeadingZeros64( aSig ); 603129203Scognet *zSigPtr = aSig<<shiftCount; 604129203Scognet *zExpPtr = 1 - shiftCount; 605129203Scognet 606129203Scognet} 607129203Scognet 608129203Scognet/* 609129203Scognet------------------------------------------------------------------------------- 610129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into an 611129203Scognetextended double-precision floating-point value, returning the result. 612129203Scognet------------------------------------------------------------------------------- 613129203Scognet*/ 614129203ScognetINLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) 615129203Scognet{ 616129203Scognet floatx80 z; 617129203Scognet 618129203Scognet z.low = zSig; 619129203Scognet z.high = ( ( (bits16) zSign )<<15 ) + zExp; 620129203Scognet return z; 621129203Scognet 622129203Scognet} 623129203Scognet 624129203Scognet/* 625129203Scognet------------------------------------------------------------------------------- 626129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 627129203Scognetand extended significand formed by the concatenation of `zSig0' and `zSig1', 628129203Scognetand returns the proper extended double-precision floating-point value 629129203Scognetcorresponding to the abstract input. Ordinarily, the abstract value is 630129203Scognetrounded and packed into the extended double-precision format, with the 631129203Scognetinexact exception raised if the abstract input cannot be represented 632129203Scognetexactly. However, if the abstract value is too large, the overflow and 633129203Scognetinexact exceptions are raised and an infinity or maximal finite value is 634129203Scognetreturned. If the abstract value is too small, the input value is rounded to 635129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if 636129203Scognetthe abstract input cannot be represented exactly as a subnormal extended 637129203Scognetdouble-precision floating-point number. 638129203Scognet If `roundingPrecision' is 32 or 64, the result is rounded to the same 639129203Scognetnumber of bits as single or double precision, respectively. Otherwise, the 640129203Scognetresult is rounded to the full precision of the extended double-precision 641129203Scognetformat. 642129203Scognet The input significand must be normalized or smaller. If the input 643129203Scognetsignificand is not normalized, `zExp' must be 0; in that case, the result 644129203Scognetreturned is a subnormal number, and it must not require rounding. The 645129203Scognethandling of underflow and overflow follows the IEC/IEEE Standard for Binary 646129203ScognetFloating-Point Arithmetic. 647129203Scognet------------------------------------------------------------------------------- 648129203Scognet*/ 649129203Scognetstatic floatx80 650129203Scognet roundAndPackFloatx80( 651129203Scognet int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 652129203Scognet ) 653129203Scognet{ 654129203Scognet int8 roundingMode; 655129203Scognet flag roundNearestEven, increment, isTiny; 656129203Scognet int64 roundIncrement, roundMask, roundBits; 657129203Scognet 658129203Scognet roundingMode = float_rounding_mode; 659129203Scognet roundNearestEven = ( roundingMode == float_round_nearest_even ); 660129203Scognet if ( roundingPrecision == 80 ) goto precision80; 661129203Scognet if ( roundingPrecision == 64 ) { 662129203Scognet roundIncrement = LIT64( 0x0000000000000400 ); 663129203Scognet roundMask = LIT64( 0x00000000000007FF ); 664129203Scognet } 665129203Scognet else if ( roundingPrecision == 32 ) { 666129203Scognet roundIncrement = LIT64( 0x0000008000000000 ); 667129203Scognet roundMask = LIT64( 0x000000FFFFFFFFFF ); 668129203Scognet } 669129203Scognet else { 670129203Scognet goto precision80; 671129203Scognet } 672129203Scognet zSig0 |= ( zSig1 != 0 ); 673129203Scognet if ( ! roundNearestEven ) { 674129203Scognet if ( roundingMode == float_round_to_zero ) { 675129203Scognet roundIncrement = 0; 676129203Scognet } 677129203Scognet else { 678129203Scognet roundIncrement = roundMask; 679129203Scognet if ( zSign ) { 680129203Scognet if ( roundingMode == float_round_up ) roundIncrement = 0; 681129203Scognet } 682129203Scognet else { 683129203Scognet if ( roundingMode == float_round_down ) roundIncrement = 0; 684129203Scognet } 685129203Scognet } 686129203Scognet } 687129203Scognet roundBits = zSig0 & roundMask; 688129203Scognet if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { 689129203Scognet if ( ( 0x7FFE < zExp ) 690129203Scognet || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) 691129203Scognet ) { 692129203Scognet goto overflow; 693129203Scognet } 694129203Scognet if ( zExp <= 0 ) { 695129203Scognet isTiny = 696129203Scognet ( float_detect_tininess == float_tininess_before_rounding ) 697129203Scognet || ( zExp < 0 ) 698129203Scognet || ( zSig0 <= zSig0 + roundIncrement ); 699129203Scognet shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); 700129203Scognet zExp = 0; 701129203Scognet roundBits = zSig0 & roundMask; 702129203Scognet if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 703129203Scognet if ( roundBits ) float_exception_flags |= float_flag_inexact; 704129203Scognet zSig0 += roundIncrement; 705129203Scognet if ( (sbits64) zSig0 < 0 ) zExp = 1; 706129203Scognet roundIncrement = roundMask + 1; 707129203Scognet if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 708129203Scognet roundMask |= roundIncrement; 709129203Scognet } 710129203Scognet zSig0 &= ~ roundMask; 711129203Scognet return packFloatx80( zSign, zExp, zSig0 ); 712129203Scognet } 713129203Scognet } 714129203Scognet if ( roundBits ) float_exception_flags |= float_flag_inexact; 715129203Scognet zSig0 += roundIncrement; 716129203Scognet if ( zSig0 < roundIncrement ) { 717129203Scognet ++zExp; 718129203Scognet zSig0 = LIT64( 0x8000000000000000 ); 719129203Scognet } 720129203Scognet roundIncrement = roundMask + 1; 721129203Scognet if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 722129203Scognet roundMask |= roundIncrement; 723129203Scognet } 724129203Scognet zSig0 &= ~ roundMask; 725129203Scognet if ( zSig0 == 0 ) zExp = 0; 726129203Scognet return packFloatx80( zSign, zExp, zSig0 ); 727129203Scognet precision80: 728129203Scognet increment = ( (sbits64) zSig1 < 0 ); 729129203Scognet if ( ! roundNearestEven ) { 730129203Scognet if ( roundingMode == float_round_to_zero ) { 731129203Scognet increment = 0; 732129203Scognet } 733129203Scognet else { 734129203Scognet if ( zSign ) { 735129203Scognet increment = ( roundingMode == float_round_down ) && zSig1; 736129203Scognet } 737129203Scognet else { 738129203Scognet increment = ( roundingMode == float_round_up ) && zSig1; 739129203Scognet } 740129203Scognet } 741129203Scognet } 742129203Scognet if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { 743129203Scognet if ( ( 0x7FFE < zExp ) 744129203Scognet || ( ( zExp == 0x7FFE ) 745129203Scognet && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) 746129203Scognet && increment 747129203Scognet ) 748129203Scognet ) { 749129203Scognet roundMask = 0; 750129203Scognet overflow: 751129203Scognet float_raise( float_flag_overflow | float_flag_inexact ); 752129203Scognet if ( ( roundingMode == float_round_to_zero ) 753129203Scognet || ( zSign && ( roundingMode == float_round_up ) ) 754129203Scognet || ( ! zSign && ( roundingMode == float_round_down ) ) 755129203Scognet ) { 756129203Scognet return packFloatx80( zSign, 0x7FFE, ~ roundMask ); 757129203Scognet } 758129203Scognet return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 759129203Scognet } 760129203Scognet if ( zExp <= 0 ) { 761129203Scognet isTiny = 762129203Scognet ( float_detect_tininess == float_tininess_before_rounding ) 763129203Scognet || ( zExp < 0 ) 764129203Scognet || ! increment 765129203Scognet || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); 766129203Scognet shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); 767129203Scognet zExp = 0; 768129203Scognet if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); 769129203Scognet if ( zSig1 ) float_exception_flags |= float_flag_inexact; 770129203Scognet if ( roundNearestEven ) { 771129203Scognet increment = ( (sbits64) zSig1 < 0 ); 772129203Scognet } 773129203Scognet else { 774129203Scognet if ( zSign ) { 775129203Scognet increment = ( roundingMode == float_round_down ) && zSig1; 776129203Scognet } 777129203Scognet else { 778129203Scognet increment = ( roundingMode == float_round_up ) && zSig1; 779129203Scognet } 780129203Scognet } 781129203Scognet if ( increment ) { 782129203Scognet ++zSig0; 783129203Scognet zSig0 &= 784129203Scognet ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); 785129203Scognet if ( (sbits64) zSig0 < 0 ) zExp = 1; 786129203Scognet } 787129203Scognet return packFloatx80( zSign, zExp, zSig0 ); 788129203Scognet } 789129203Scognet } 790129203Scognet if ( zSig1 ) float_exception_flags |= float_flag_inexact; 791129203Scognet if ( increment ) { 792129203Scognet ++zSig0; 793129203Scognet if ( zSig0 == 0 ) { 794129203Scognet ++zExp; 795129203Scognet zSig0 = LIT64( 0x8000000000000000 ); 796129203Scognet } 797129203Scognet else { 798129203Scognet zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); 799129203Scognet } 800129203Scognet } 801129203Scognet else { 802129203Scognet if ( zSig0 == 0 ) zExp = 0; 803129203Scognet } 804129203Scognet return packFloatx80( zSign, zExp, zSig0 ); 805129203Scognet 806129203Scognet} 807129203Scognet 808129203Scognet/* 809129203Scognet------------------------------------------------------------------------------- 810129203ScognetTakes an abstract floating-point value having sign `zSign', exponent 811129203Scognet`zExp', and significand formed by the concatenation of `zSig0' and `zSig1', 812129203Scognetand returns the proper extended double-precision floating-point value 813129203Scognetcorresponding to the abstract input. This routine is just like 814129203Scognet`roundAndPackFloatx80' except that the input significand does not have to be 815129203Scognetnormalized. 816129203Scognet------------------------------------------------------------------------------- 817129203Scognet*/ 818129203Scognetstatic floatx80 819129203Scognet normalizeRoundAndPackFloatx80( 820129203Scognet int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 821129203Scognet ) 822129203Scognet{ 823129203Scognet int8 shiftCount; 824129203Scognet 825129203Scognet if ( zSig0 == 0 ) { 826129203Scognet zSig0 = zSig1; 827129203Scognet zSig1 = 0; 828129203Scognet zExp -= 64; 829129203Scognet } 830129203Scognet shiftCount = countLeadingZeros64( zSig0 ); 831129203Scognet shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 832129203Scognet zExp -= shiftCount; 833129203Scognet return 834129203Scognet roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); 835129203Scognet 836129203Scognet} 837129203Scognet 838129203Scognet#endif 839129203Scognet 840129203Scognet#ifdef FLOAT128 841129203Scognet 842129203Scognet/* 843129203Scognet------------------------------------------------------------------------------- 844129203ScognetReturns the least-significant 64 fraction bits of the quadruple-precision 845129203Scognetfloating-point value `a'. 846129203Scognet------------------------------------------------------------------------------- 847129203Scognet*/ 848129203ScognetINLINE bits64 extractFloat128Frac1( float128 a ) 849129203Scognet{ 850129203Scognet 851129203Scognet return a.low; 852129203Scognet 853129203Scognet} 854129203Scognet 855129203Scognet/* 856129203Scognet------------------------------------------------------------------------------- 857129203ScognetReturns the most-significant 48 fraction bits of the quadruple-precision 858129203Scognetfloating-point value `a'. 859129203Scognet------------------------------------------------------------------------------- 860129203Scognet*/ 861129203ScognetINLINE bits64 extractFloat128Frac0( float128 a ) 862129203Scognet{ 863129203Scognet 864129203Scognet return a.high & LIT64( 0x0000FFFFFFFFFFFF ); 865129203Scognet 866129203Scognet} 867129203Scognet 868129203Scognet/* 869129203Scognet------------------------------------------------------------------------------- 870129203ScognetReturns the exponent bits of the quadruple-precision floating-point value 871129203Scognet`a'. 872129203Scognet------------------------------------------------------------------------------- 873129203Scognet*/ 874129203ScognetINLINE int32 extractFloat128Exp( float128 a ) 875129203Scognet{ 876129203Scognet 877129203Scognet return ( a.high>>48 ) & 0x7FFF; 878129203Scognet 879129203Scognet} 880129203Scognet 881129203Scognet/* 882129203Scognet------------------------------------------------------------------------------- 883129203ScognetReturns the sign bit of the quadruple-precision floating-point value `a'. 884129203Scognet------------------------------------------------------------------------------- 885129203Scognet*/ 886129203ScognetINLINE flag extractFloat128Sign( float128 a ) 887129203Scognet{ 888129203Scognet 889129203Scognet return a.high>>63; 890129203Scognet 891129203Scognet} 892129203Scognet 893129203Scognet/* 894129203Scognet------------------------------------------------------------------------------- 895129203ScognetNormalizes the subnormal quadruple-precision floating-point value 896129203Scognetrepresented by the denormalized significand formed by the concatenation of 897129203Scognet`aSig0' and `aSig1'. The normalized exponent is stored at the location 898129203Scognetpointed to by `zExpPtr'. The most significant 49 bits of the normalized 899129203Scognetsignificand are stored at the location pointed to by `zSig0Ptr', and the 900129203Scognetleast significant 64 bits of the normalized significand are stored at the 901129203Scognetlocation pointed to by `zSig1Ptr'. 902129203Scognet------------------------------------------------------------------------------- 903129203Scognet*/ 904129203Scognetstatic void 905129203Scognet normalizeFloat128Subnormal( 906129203Scognet bits64 aSig0, 907129203Scognet bits64 aSig1, 908129203Scognet int32 *zExpPtr, 909129203Scognet bits64 *zSig0Ptr, 910129203Scognet bits64 *zSig1Ptr 911129203Scognet ) 912129203Scognet{ 913129203Scognet int8 shiftCount; 914129203Scognet 915129203Scognet if ( aSig0 == 0 ) { 916129203Scognet shiftCount = countLeadingZeros64( aSig1 ) - 15; 917129203Scognet if ( shiftCount < 0 ) { 918129203Scognet *zSig0Ptr = aSig1>>( - shiftCount ); 919129203Scognet *zSig1Ptr = aSig1<<( shiftCount & 63 ); 920129203Scognet } 921129203Scognet else { 922129203Scognet *zSig0Ptr = aSig1<<shiftCount; 923129203Scognet *zSig1Ptr = 0; 924129203Scognet } 925129203Scognet *zExpPtr = - shiftCount - 63; 926129203Scognet } 927129203Scognet else { 928129203Scognet shiftCount = countLeadingZeros64( aSig0 ) - 15; 929129203Scognet shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); 930129203Scognet *zExpPtr = 1 - shiftCount; 931129203Scognet } 932129203Scognet 933129203Scognet} 934129203Scognet 935129203Scognet/* 936129203Scognet------------------------------------------------------------------------------- 937129203ScognetPacks the sign `zSign', the exponent `zExp', and the significand formed 938129203Scognetby the concatenation of `zSig0' and `zSig1' into a quadruple-precision 939129203Scognetfloating-point value, returning the result. After being shifted into the 940129203Scognetproper positions, the three fields `zSign', `zExp', and `zSig0' are simply 941129203Scognetadded together to form the most significant 32 bits of the result. This 942129203Scognetmeans that any integer portion of `zSig0' will be added into the exponent. 943129203ScognetSince a properly normalized significand will have an integer portion equal 944129203Scognetto 1, the `zExp' input should be 1 less than the desired result exponent 945129203Scognetwhenever `zSig0' and `zSig1' concatenated form a complete, normalized 946129203Scognetsignificand. 947129203Scognet------------------------------------------------------------------------------- 948129203Scognet*/ 949129203ScognetINLINE float128 950129203Scognet packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) 951129203Scognet{ 952129203Scognet float128 z; 953129203Scognet 954129203Scognet z.low = zSig1; 955129203Scognet z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0; 956129203Scognet return z; 957129203Scognet 958129203Scognet} 959129203Scognet 960129203Scognet/* 961129203Scognet------------------------------------------------------------------------------- 962129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 963129203Scognetand extended significand formed by the concatenation of `zSig0', `zSig1', 964129203Scognetand `zSig2', and returns the proper quadruple-precision floating-point value 965129203Scognetcorresponding to the abstract input. Ordinarily, the abstract value is 966129203Scognetsimply rounded and packed into the quadruple-precision format, with the 967129203Scognetinexact exception raised if the abstract input cannot be represented 968129203Scognetexactly. However, if the abstract value is too large, the overflow and 969129203Scognetinexact exceptions are raised and an infinity or maximal finite value is 970129203Scognetreturned. If the abstract value is too small, the input value is rounded to 971129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if 972129203Scognetthe abstract input cannot be represented exactly as a subnormal quadruple- 973129203Scognetprecision floating-point number. 974129203Scognet The input significand must be normalized or smaller. If the input 975129203Scognetsignificand is not normalized, `zExp' must be 0; in that case, the result 976129203Scognetreturned is a subnormal number, and it must not require rounding. In the 977129203Scognetusual case that the input significand is normalized, `zExp' must be 1 less 978129203Scognetthan the ``true'' floating-point exponent. The handling of underflow and 979129203Scognetoverflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 980129203Scognet------------------------------------------------------------------------------- 981129203Scognet*/ 982129203Scognetstatic float128 983129203Scognet roundAndPackFloat128( 984129203Scognet flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 ) 985129203Scognet{ 986129203Scognet int8 roundingMode; 987129203Scognet flag roundNearestEven, increment, isTiny; 988129203Scognet 989129203Scognet roundingMode = float_rounding_mode; 990129203Scognet roundNearestEven = ( roundingMode == float_round_nearest_even ); 991129203Scognet increment = ( (sbits64) zSig2 < 0 ); 992129203Scognet if ( ! roundNearestEven ) { 993129203Scognet if ( roundingMode == float_round_to_zero ) { 994129203Scognet increment = 0; 995129203Scognet } 996129203Scognet else { 997129203Scognet if ( zSign ) { 998129203Scognet increment = ( roundingMode == float_round_down ) && zSig2; 999129203Scognet } 1000129203Scognet else { 1001129203Scognet increment = ( roundingMode == float_round_up ) && zSig2; 1002129203Scognet } 1003129203Scognet } 1004129203Scognet } 1005129203Scognet if ( 0x7FFD <= (bits32) zExp ) { 1006129203Scognet if ( ( 0x7FFD < zExp ) 1007129203Scognet || ( ( zExp == 0x7FFD ) 1008129203Scognet && eq128( 1009129203Scognet LIT64( 0x0001FFFFFFFFFFFF ), 1010129203Scognet LIT64( 0xFFFFFFFFFFFFFFFF ), 1011129203Scognet zSig0, 1012129203Scognet zSig1 1013129203Scognet ) 1014129203Scognet && increment 1015129203Scognet ) 1016129203Scognet ) { 1017129203Scognet float_raise( float_flag_overflow | float_flag_inexact ); 1018129203Scognet if ( ( roundingMode == float_round_to_zero ) 1019129203Scognet || ( zSign && ( roundingMode == float_round_up ) ) 1020129203Scognet || ( ! zSign && ( roundingMode == float_round_down ) ) 1021129203Scognet ) { 1022129203Scognet return 1023129203Scognet packFloat128( 1024129203Scognet zSign, 1025129203Scognet 0x7FFE, 1026129203Scognet LIT64( 0x0000FFFFFFFFFFFF ), 1027129203Scognet LIT64( 0xFFFFFFFFFFFFFFFF ) 1028129203Scognet ); 1029129203Scognet } 1030129203Scognet return packFloat128( zSign, 0x7FFF, 0, 0 ); 1031129203Scognet } 1032129203Scognet if ( zExp < 0 ) { 1033129203Scognet isTiny = 1034129203Scognet ( float_detect_tininess == float_tininess_before_rounding ) 1035129203Scognet || ( zExp < -1 ) 1036129203Scognet || ! increment 1037129203Scognet || lt128( 1038129203Scognet zSig0, 1039129203Scognet zSig1, 1040129203Scognet LIT64( 0x0001FFFFFFFFFFFF ), 1041129203Scognet LIT64( 0xFFFFFFFFFFFFFFFF ) 1042129203Scognet ); 1043129203Scognet shift128ExtraRightJamming( 1044129203Scognet zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); 1045129203Scognet zExp = 0; 1046129203Scognet if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); 1047129203Scognet if ( roundNearestEven ) { 1048129203Scognet increment = ( (sbits64) zSig2 < 0 ); 1049129203Scognet } 1050129203Scognet else { 1051129203Scognet if ( zSign ) { 1052129203Scognet increment = ( roundingMode == float_round_down ) && zSig2; 1053129203Scognet } 1054129203Scognet else { 1055129203Scognet increment = ( roundingMode == float_round_up ) && zSig2; 1056129203Scognet } 1057129203Scognet } 1058129203Scognet } 1059129203Scognet } 1060129203Scognet if ( zSig2 ) float_exception_flags |= float_flag_inexact; 1061129203Scognet if ( increment ) { 1062129203Scognet add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); 1063129203Scognet zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); 1064129203Scognet } 1065129203Scognet else { 1066129203Scognet if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; 1067129203Scognet } 1068129203Scognet return packFloat128( zSign, zExp, zSig0, zSig1 ); 1069129203Scognet 1070129203Scognet} 1071129203Scognet 1072129203Scognet/* 1073129203Scognet------------------------------------------------------------------------------- 1074129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 1075129203Scognetand significand formed by the concatenation of `zSig0' and `zSig1', and 1076129203Scognetreturns the proper quadruple-precision floating-point value corresponding 1077129203Scognetto the abstract input. This routine is just like `roundAndPackFloat128' 1078129203Scognetexcept that the input significand has fewer bits and does not have to be 1079129203Scognetnormalized. In all cases, `zExp' must be 1 less than the ``true'' floating- 1080129203Scognetpoint exponent. 1081129203Scognet------------------------------------------------------------------------------- 1082129203Scognet*/ 1083129203Scognetstatic float128 1084129203Scognet normalizeRoundAndPackFloat128( 1085129203Scognet flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) 1086129203Scognet{ 1087129203Scognet int8 shiftCount; 1088129203Scognet bits64 zSig2; 1089129203Scognet 1090129203Scognet if ( zSig0 == 0 ) { 1091129203Scognet zSig0 = zSig1; 1092129203Scognet zSig1 = 0; 1093129203Scognet zExp -= 64; 1094129203Scognet } 1095129203Scognet shiftCount = countLeadingZeros64( zSig0 ) - 15; 1096129203Scognet if ( 0 <= shiftCount ) { 1097129203Scognet zSig2 = 0; 1098129203Scognet shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 1099129203Scognet } 1100129203Scognet else { 1101129203Scognet shift128ExtraRightJamming( 1102129203Scognet zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); 1103129203Scognet } 1104129203Scognet zExp -= shiftCount; 1105129203Scognet return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 1106129203Scognet 1107129203Scognet} 1108129203Scognet 1109129203Scognet#endif 1110129203Scognet 1111129203Scognet/* 1112129203Scognet------------------------------------------------------------------------------- 1113129203ScognetReturns the result of converting the 32-bit two's complement integer `a' 1114129203Scognetto the single-precision floating-point format. The conversion is performed 1115129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1116129203Scognet------------------------------------------------------------------------------- 1117129203Scognet*/ 1118129203Scognetfloat32 int32_to_float32( int32 a ) 1119129203Scognet{ 1120129203Scognet flag zSign; 1121129203Scognet 1122129203Scognet if ( a == 0 ) return 0; 1123129203Scognet if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); 1124129203Scognet zSign = ( a < 0 ); 1125129203Scognet return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); 1126129203Scognet 1127129203Scognet} 1128129203Scognet 1129230380Sdas#ifndef SOFTFLOAT_FOR_GCC /* __floatunsisf is in libgcc */ 1130230363Sdasfloat32 uint32_to_float32( uint32 a ) 1131230363Sdas{ 1132230363Sdas if ( a == 0 ) return 0; 1133230363Sdas if ( a & (bits32) 0x80000000 ) 1134230363Sdas return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 ); 1135230363Sdas return normalizeRoundAndPackFloat32( 0, 0x9C, a ); 1136230363Sdas} 1137230380Sdas#endif 1138230363Sdas 1139230363Sdas 1140129203Scognet/* 1141129203Scognet------------------------------------------------------------------------------- 1142129203ScognetReturns the result of converting the 32-bit two's complement integer `a' 1143129203Scognetto the double-precision floating-point format. The conversion is performed 1144129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1145129203Scognet------------------------------------------------------------------------------- 1146129203Scognet*/ 1147129203Scognetfloat64 int32_to_float64( int32 a ) 1148129203Scognet{ 1149129203Scognet flag zSign; 1150129203Scognet uint32 absA; 1151129203Scognet int8 shiftCount; 1152129203Scognet bits64 zSig; 1153129203Scognet 1154129203Scognet if ( a == 0 ) return 0; 1155129203Scognet zSign = ( a < 0 ); 1156129203Scognet absA = zSign ? - a : a; 1157129203Scognet shiftCount = countLeadingZeros32( absA ) + 21; 1158129203Scognet zSig = absA; 1159129203Scognet return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount ); 1160129203Scognet 1161129203Scognet} 1162129203Scognet 1163230380Sdas#ifndef SOFTFLOAT_FOR_GCC /* __floatunsidf is in libgcc */ 1164230363Sdasfloat64 uint32_to_float64( uint32 a ) 1165230363Sdas{ 1166230363Sdas int8 shiftCount; 1167230363Sdas bits64 zSig = a; 1168230363Sdas 1169230363Sdas if ( a == 0 ) return 0; 1170230363Sdas shiftCount = countLeadingZeros32( a ) + 21; 1171230363Sdas return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount ); 1172230363Sdas 1173230363Sdas} 1174230380Sdas#endif 1175230363Sdas 1176129203Scognet#ifdef FLOATX80 1177129203Scognet 1178129203Scognet/* 1179129203Scognet------------------------------------------------------------------------------- 1180129203ScognetReturns the result of converting the 32-bit two's complement integer `a' 1181129203Scognetto the extended double-precision floating-point format. The conversion 1182129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 1183129203ScognetArithmetic. 1184129203Scognet------------------------------------------------------------------------------- 1185129203Scognet*/ 1186129203Scognetfloatx80 int32_to_floatx80( int32 a ) 1187129203Scognet{ 1188129203Scognet flag zSign; 1189129203Scognet uint32 absA; 1190129203Scognet int8 shiftCount; 1191129203Scognet bits64 zSig; 1192129203Scognet 1193129203Scognet if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 1194129203Scognet zSign = ( a < 0 ); 1195129203Scognet absA = zSign ? - a : a; 1196129203Scognet shiftCount = countLeadingZeros32( absA ) + 32; 1197129203Scognet zSig = absA; 1198129203Scognet return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); 1199129203Scognet 1200129203Scognet} 1201129203Scognet 1202230363Sdasfloatx80 uint32_to_floatx80( uint32 a ) 1203230363Sdas{ 1204230363Sdas int8 shiftCount; 1205230363Sdas bits64 zSig = a; 1206230363Sdas 1207230363Sdas if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 1208230363Sdas shiftCount = countLeadingZeros32( a ) + 32; 1209230363Sdas return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount ); 1210230363Sdas 1211230363Sdas} 1212230363Sdas 1213129203Scognet#endif 1214129203Scognet 1215129203Scognet#ifdef FLOAT128 1216129203Scognet 1217129203Scognet/* 1218129203Scognet------------------------------------------------------------------------------- 1219129203ScognetReturns the result of converting the 32-bit two's complement integer `a' to 1220129203Scognetthe quadruple-precision floating-point format. The conversion is performed 1221129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1222129203Scognet------------------------------------------------------------------------------- 1223129203Scognet*/ 1224129203Scognetfloat128 int32_to_float128( int32 a ) 1225129203Scognet{ 1226129203Scognet flag zSign; 1227129203Scognet uint32 absA; 1228129203Scognet int8 shiftCount; 1229129203Scognet bits64 zSig0; 1230129203Scognet 1231129203Scognet if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); 1232129203Scognet zSign = ( a < 0 ); 1233129203Scognet absA = zSign ? - a : a; 1234129203Scognet shiftCount = countLeadingZeros32( absA ) + 17; 1235129203Scognet zSig0 = absA; 1236129203Scognet return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 ); 1237129203Scognet 1238129203Scognet} 1239129203Scognet 1240230363Sdasfloat128 uint32_to_float128( uint32 a ) 1241230363Sdas{ 1242230363Sdas int8 shiftCount; 1243230363Sdas bits64 zSig0 = a; 1244230363Sdas 1245230363Sdas if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); 1246230363Sdas shiftCount = countLeadingZeros32( a ) + 17; 1247230363Sdas return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 ); 1248230363Sdas 1249230363Sdas} 1250230363Sdas 1251129203Scognet#endif 1252129203Scognet 1253129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */ 1254129203Scognet/* 1255129203Scognet------------------------------------------------------------------------------- 1256129203ScognetReturns the result of converting the 64-bit two's complement integer `a' 1257129203Scognetto the single-precision floating-point format. The conversion is performed 1258129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1259129203Scognet------------------------------------------------------------------------------- 1260129203Scognet*/ 1261129203Scognetfloat32 int64_to_float32( int64 a ) 1262129203Scognet{ 1263129203Scognet flag zSign; 1264129203Scognet uint64 absA; 1265129203Scognet int8 shiftCount; 1266129203Scognet 1267129203Scognet if ( a == 0 ) return 0; 1268129203Scognet zSign = ( a < 0 ); 1269129203Scognet absA = zSign ? - a : a; 1270129203Scognet shiftCount = countLeadingZeros64( absA ) - 40; 1271129203Scognet if ( 0 <= shiftCount ) { 1272129203Scognet return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount ); 1273129203Scognet } 1274129203Scognet else { 1275129203Scognet shiftCount += 7; 1276129203Scognet if ( shiftCount < 0 ) { 1277129203Scognet shift64RightJamming( absA, - shiftCount, &absA ); 1278129203Scognet } 1279129203Scognet else { 1280129203Scognet absA <<= shiftCount; 1281129203Scognet } 1282129203Scognet return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA ); 1283129203Scognet } 1284129203Scognet 1285129203Scognet} 1286129203Scognet 1287129203Scognet/* 1288129203Scognet------------------------------------------------------------------------------- 1289129203ScognetReturns the result of converting the 64-bit two's complement integer `a' 1290129203Scognetto the double-precision floating-point format. The conversion is performed 1291129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1292129203Scognet------------------------------------------------------------------------------- 1293129203Scognet*/ 1294129203Scognetfloat64 int64_to_float64( int64 a ) 1295129203Scognet{ 1296129203Scognet flag zSign; 1297129203Scognet 1298129203Scognet if ( a == 0 ) return 0; 1299129203Scognet if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) { 1300129203Scognet return packFloat64( 1, 0x43E, 0 ); 1301129203Scognet } 1302129203Scognet zSign = ( a < 0 ); 1303129203Scognet return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a ); 1304129203Scognet 1305129203Scognet} 1306129203Scognet 1307129203Scognet#ifdef FLOATX80 1308129203Scognet 1309129203Scognet/* 1310129203Scognet------------------------------------------------------------------------------- 1311129203ScognetReturns the result of converting the 64-bit two's complement integer `a' 1312129203Scognetto the extended double-precision floating-point format. The conversion 1313129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 1314129203ScognetArithmetic. 1315129203Scognet------------------------------------------------------------------------------- 1316129203Scognet*/ 1317129203Scognetfloatx80 int64_to_floatx80( int64 a ) 1318129203Scognet{ 1319129203Scognet flag zSign; 1320129203Scognet uint64 absA; 1321129203Scognet int8 shiftCount; 1322129203Scognet 1323129203Scognet if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 1324129203Scognet zSign = ( a < 0 ); 1325129203Scognet absA = zSign ? - a : a; 1326129203Scognet shiftCount = countLeadingZeros64( absA ); 1327129203Scognet return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount ); 1328129203Scognet 1329129203Scognet} 1330129203Scognet 1331129203Scognet#endif 1332129203Scognet 1333129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 1334129203Scognet 1335129203Scognet#ifdef FLOAT128 1336129203Scognet 1337129203Scognet/* 1338129203Scognet------------------------------------------------------------------------------- 1339129203ScognetReturns the result of converting the 64-bit two's complement integer `a' to 1340129203Scognetthe quadruple-precision floating-point format. The conversion is performed 1341129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1342129203Scognet------------------------------------------------------------------------------- 1343129203Scognet*/ 1344129203Scognetfloat128 int64_to_float128( int64 a ) 1345129203Scognet{ 1346129203Scognet flag zSign; 1347129203Scognet uint64 absA; 1348129203Scognet int8 shiftCount; 1349129203Scognet int32 zExp; 1350129203Scognet bits64 zSig0, zSig1; 1351129203Scognet 1352129203Scognet if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); 1353129203Scognet zSign = ( a < 0 ); 1354129203Scognet absA = zSign ? - a : a; 1355129203Scognet shiftCount = countLeadingZeros64( absA ) + 49; 1356129203Scognet zExp = 0x406E - shiftCount; 1357129203Scognet if ( 64 <= shiftCount ) { 1358129203Scognet zSig1 = 0; 1359129203Scognet zSig0 = absA; 1360129203Scognet shiftCount -= 64; 1361129203Scognet } 1362129203Scognet else { 1363129203Scognet zSig1 = absA; 1364129203Scognet zSig0 = 0; 1365129203Scognet } 1366129203Scognet shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 1367129203Scognet return packFloat128( zSign, zExp, zSig0, zSig1 ); 1368129203Scognet 1369129203Scognet} 1370129203Scognet 1371129203Scognet#endif 1372129203Scognet 1373129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1374129203Scognet/* 1375129203Scognet------------------------------------------------------------------------------- 1376129203ScognetReturns the result of converting the single-precision floating-point value 1377129203Scognet`a' to the 32-bit two's complement integer format. The conversion is 1378129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 1379129203ScognetArithmetic---which means in particular that the conversion is rounded 1380129203Scognetaccording to the current rounding mode. If `a' is a NaN, the largest 1381129203Scognetpositive integer is returned. Otherwise, if the conversion overflows, the 1382129203Scognetlargest integer with the same sign as `a' is returned. 1383129203Scognet------------------------------------------------------------------------------- 1384129203Scognet*/ 1385129203Scognetint32 float32_to_int32( float32 a ) 1386129203Scognet{ 1387129203Scognet flag aSign; 1388129203Scognet int16 aExp, shiftCount; 1389129203Scognet bits32 aSig; 1390129203Scognet bits64 aSig64; 1391129203Scognet 1392129203Scognet aSig = extractFloat32Frac( a ); 1393129203Scognet aExp = extractFloat32Exp( a ); 1394129203Scognet aSign = extractFloat32Sign( a ); 1395129203Scognet if ( ( aExp == 0xFF ) && aSig ) aSign = 0; 1396129203Scognet if ( aExp ) aSig |= 0x00800000; 1397129203Scognet shiftCount = 0xAF - aExp; 1398129203Scognet aSig64 = aSig; 1399129203Scognet aSig64 <<= 32; 1400129203Scognet if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 ); 1401129203Scognet return roundAndPackInt32( aSign, aSig64 ); 1402129203Scognet 1403129203Scognet} 1404129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 1405129203Scognet 1406129203Scognet/* 1407129203Scognet------------------------------------------------------------------------------- 1408129203ScognetReturns the result of converting the single-precision floating-point value 1409129203Scognet`a' to the 32-bit two's complement integer format. The conversion is 1410129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 1411129203ScognetArithmetic, except that the conversion is always rounded toward zero. 1412129203ScognetIf `a' is a NaN, the largest positive integer is returned. Otherwise, if 1413129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is 1414129203Scognetreturned. 1415129203Scognet------------------------------------------------------------------------------- 1416129203Scognet*/ 1417129203Scognetint32 float32_to_int32_round_to_zero( float32 a ) 1418129203Scognet{ 1419129203Scognet flag aSign; 1420129203Scognet int16 aExp, shiftCount; 1421129203Scognet bits32 aSig; 1422129203Scognet int32 z; 1423129203Scognet 1424129203Scognet aSig = extractFloat32Frac( a ); 1425129203Scognet aExp = extractFloat32Exp( a ); 1426129203Scognet aSign = extractFloat32Sign( a ); 1427129203Scognet shiftCount = aExp - 0x9E; 1428129203Scognet if ( 0 <= shiftCount ) { 1429129203Scognet if ( a != 0xCF000000 ) { 1430129203Scognet float_raise( float_flag_invalid ); 1431129203Scognet if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; 1432129203Scognet } 1433129203Scognet return (sbits32) 0x80000000; 1434129203Scognet } 1435129203Scognet else if ( aExp <= 0x7E ) { 1436129203Scognet if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 1437129203Scognet return 0; 1438129203Scognet } 1439129203Scognet aSig = ( aSig | 0x00800000 )<<8; 1440129203Scognet z = aSig>>( - shiftCount ); 1441129203Scognet if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { 1442129203Scognet float_exception_flags |= float_flag_inexact; 1443129203Scognet } 1444129203Scognet if ( aSign ) z = - z; 1445129203Scognet return z; 1446129203Scognet 1447129203Scognet} 1448129203Scognet 1449129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */ 1450129203Scognet/* 1451129203Scognet------------------------------------------------------------------------------- 1452129203ScognetReturns the result of converting the single-precision floating-point value 1453129203Scognet`a' to the 64-bit two's complement integer format. The conversion is 1454129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 1455129203ScognetArithmetic---which means in particular that the conversion is rounded 1456129203Scognetaccording to the current rounding mode. If `a' is a NaN, the largest 1457129203Scognetpositive integer is returned. Otherwise, if the conversion overflows, the 1458129203Scognetlargest integer with the same sign as `a' is returned. 1459129203Scognet------------------------------------------------------------------------------- 1460129203Scognet*/ 1461129203Scognetint64 float32_to_int64( float32 a ) 1462129203Scognet{ 1463129203Scognet flag aSign; 1464129203Scognet int16 aExp, shiftCount; 1465129203Scognet bits32 aSig; 1466129203Scognet bits64 aSig64, aSigExtra; 1467129203Scognet 1468129203Scognet aSig = extractFloat32Frac( a ); 1469129203Scognet aExp = extractFloat32Exp( a ); 1470129203Scognet aSign = extractFloat32Sign( a ); 1471129203Scognet shiftCount = 0xBE - aExp; 1472129203Scognet if ( shiftCount < 0 ) { 1473129203Scognet float_raise( float_flag_invalid ); 1474129203Scognet if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 1475129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 1476129203Scognet } 1477129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 1478129203Scognet } 1479129203Scognet if ( aExp ) aSig |= 0x00800000; 1480129203Scognet aSig64 = aSig; 1481129203Scognet aSig64 <<= 40; 1482129203Scognet shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra ); 1483129203Scognet return roundAndPackInt64( aSign, aSig64, aSigExtra ); 1484129203Scognet 1485129203Scognet} 1486129203Scognet 1487129203Scognet/* 1488129203Scognet------------------------------------------------------------------------------- 1489129203ScognetReturns the result of converting the single-precision floating-point value 1490129203Scognet`a' to the 64-bit two's complement integer format. The conversion is 1491129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 1492129203ScognetArithmetic, except that the conversion is always rounded toward zero. If 1493129203Scognet`a' is a NaN, the largest positive integer is returned. Otherwise, if the 1494129203Scognetconversion overflows, the largest integer with the same sign as `a' is 1495129203Scognetreturned. 1496129203Scognet------------------------------------------------------------------------------- 1497129203Scognet*/ 1498129203Scognetint64 float32_to_int64_round_to_zero( float32 a ) 1499129203Scognet{ 1500129203Scognet flag aSign; 1501129203Scognet int16 aExp, shiftCount; 1502129203Scognet bits32 aSig; 1503129203Scognet bits64 aSig64; 1504129203Scognet int64 z; 1505129203Scognet 1506129203Scognet aSig = extractFloat32Frac( a ); 1507129203Scognet aExp = extractFloat32Exp( a ); 1508129203Scognet aSign = extractFloat32Sign( a ); 1509129203Scognet shiftCount = aExp - 0xBE; 1510129203Scognet if ( 0 <= shiftCount ) { 1511129203Scognet if ( a != 0xDF000000 ) { 1512129203Scognet float_raise( float_flag_invalid ); 1513129203Scognet if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 1514129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 1515129203Scognet } 1516129203Scognet } 1517129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 1518129203Scognet } 1519129203Scognet else if ( aExp <= 0x7E ) { 1520129203Scognet if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 1521129203Scognet return 0; 1522129203Scognet } 1523129203Scognet aSig64 = aSig | 0x00800000; 1524129203Scognet aSig64 <<= 40; 1525129203Scognet z = aSig64>>( - shiftCount ); 1526129203Scognet if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) { 1527129203Scognet float_exception_flags |= float_flag_inexact; 1528129203Scognet } 1529129203Scognet if ( aSign ) z = - z; 1530129203Scognet return z; 1531129203Scognet 1532129203Scognet} 1533129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 1534129203Scognet 1535129203Scognet/* 1536129203Scognet------------------------------------------------------------------------------- 1537129203ScognetReturns the result of converting the single-precision floating-point value 1538129203Scognet`a' to the double-precision floating-point format. The conversion is 1539129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 1540129203ScognetArithmetic. 1541129203Scognet------------------------------------------------------------------------------- 1542129203Scognet*/ 1543129203Scognetfloat64 float32_to_float64( float32 a ) 1544129203Scognet{ 1545129203Scognet flag aSign; 1546129203Scognet int16 aExp; 1547129203Scognet bits32 aSig; 1548129203Scognet 1549129203Scognet aSig = extractFloat32Frac( a ); 1550129203Scognet aExp = extractFloat32Exp( a ); 1551129203Scognet aSign = extractFloat32Sign( a ); 1552129203Scognet if ( aExp == 0xFF ) { 1553129203Scognet if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); 1554129203Scognet return packFloat64( aSign, 0x7FF, 0 ); 1555129203Scognet } 1556129203Scognet if ( aExp == 0 ) { 1557129203Scognet if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); 1558129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1559129203Scognet --aExp; 1560129203Scognet } 1561129203Scognet return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); 1562129203Scognet 1563129203Scognet} 1564129203Scognet 1565129203Scognet#ifdef FLOATX80 1566129203Scognet 1567129203Scognet/* 1568129203Scognet------------------------------------------------------------------------------- 1569129203ScognetReturns the result of converting the single-precision floating-point value 1570129203Scognet`a' to the extended double-precision floating-point format. The conversion 1571129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 1572129203ScognetArithmetic. 1573129203Scognet------------------------------------------------------------------------------- 1574129203Scognet*/ 1575129203Scognetfloatx80 float32_to_floatx80( float32 a ) 1576129203Scognet{ 1577129203Scognet flag aSign; 1578129203Scognet int16 aExp; 1579129203Scognet bits32 aSig; 1580129203Scognet 1581129203Scognet aSig = extractFloat32Frac( a ); 1582129203Scognet aExp = extractFloat32Exp( a ); 1583129203Scognet aSign = extractFloat32Sign( a ); 1584129203Scognet if ( aExp == 0xFF ) { 1585129203Scognet if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); 1586129203Scognet return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 1587129203Scognet } 1588129203Scognet if ( aExp == 0 ) { 1589129203Scognet if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 1590129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1591129203Scognet } 1592129203Scognet aSig |= 0x00800000; 1593129203Scognet return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); 1594129203Scognet 1595129203Scognet} 1596129203Scognet 1597129203Scognet#endif 1598129203Scognet 1599129203Scognet#ifdef FLOAT128 1600129203Scognet 1601129203Scognet/* 1602129203Scognet------------------------------------------------------------------------------- 1603129203ScognetReturns the result of converting the single-precision floating-point value 1604129203Scognet`a' to the double-precision floating-point format. The conversion is 1605129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 1606129203ScognetArithmetic. 1607129203Scognet------------------------------------------------------------------------------- 1608129203Scognet*/ 1609129203Scognetfloat128 float32_to_float128( float32 a ) 1610129203Scognet{ 1611129203Scognet flag aSign; 1612129203Scognet int16 aExp; 1613129203Scognet bits32 aSig; 1614129203Scognet 1615129203Scognet aSig = extractFloat32Frac( a ); 1616129203Scognet aExp = extractFloat32Exp( a ); 1617129203Scognet aSign = extractFloat32Sign( a ); 1618129203Scognet if ( aExp == 0xFF ) { 1619129203Scognet if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) ); 1620129203Scognet return packFloat128( aSign, 0x7FFF, 0, 0 ); 1621129203Scognet } 1622129203Scognet if ( aExp == 0 ) { 1623129203Scognet if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); 1624129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1625129203Scognet --aExp; 1626129203Scognet } 1627129203Scognet return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 ); 1628129203Scognet 1629129203Scognet} 1630129203Scognet 1631129203Scognet#endif 1632129203Scognet 1633129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1634129203Scognet/* 1635129203Scognet------------------------------------------------------------------------------- 1636129203ScognetRounds the single-precision floating-point value `a' to an integer, and 1637129203Scognetreturns the result as a single-precision floating-point value. The 1638129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary 1639129203ScognetFloating-Point Arithmetic. 1640129203Scognet------------------------------------------------------------------------------- 1641129203Scognet*/ 1642129203Scognetfloat32 float32_round_to_int( float32 a ) 1643129203Scognet{ 1644129203Scognet flag aSign; 1645129203Scognet int16 aExp; 1646129203Scognet bits32 lastBitMask, roundBitsMask; 1647129203Scognet int8 roundingMode; 1648129203Scognet float32 z; 1649129203Scognet 1650129203Scognet aExp = extractFloat32Exp( a ); 1651129203Scognet if ( 0x96 <= aExp ) { 1652129203Scognet if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { 1653129203Scognet return propagateFloat32NaN( a, a ); 1654129203Scognet } 1655129203Scognet return a; 1656129203Scognet } 1657129203Scognet if ( aExp <= 0x7E ) { 1658129203Scognet if ( (bits32) ( a<<1 ) == 0 ) return a; 1659129203Scognet float_exception_flags |= float_flag_inexact; 1660129203Scognet aSign = extractFloat32Sign( a ); 1661129203Scognet switch ( float_rounding_mode ) { 1662129203Scognet case float_round_nearest_even: 1663129203Scognet if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { 1664129203Scognet return packFloat32( aSign, 0x7F, 0 ); 1665129203Scognet } 1666129203Scognet break; 1667129203Scognet case float_round_to_zero: 1668129203Scognet break; 1669129203Scognet case float_round_down: 1670129203Scognet return aSign ? 0xBF800000 : 0; 1671129203Scognet case float_round_up: 1672129203Scognet return aSign ? 0x80000000 : 0x3F800000; 1673129203Scognet } 1674129203Scognet return packFloat32( aSign, 0, 0 ); 1675129203Scognet } 1676129203Scognet lastBitMask = 1; 1677129203Scognet lastBitMask <<= 0x96 - aExp; 1678129203Scognet roundBitsMask = lastBitMask - 1; 1679129203Scognet z = a; 1680129203Scognet roundingMode = float_rounding_mode; 1681129203Scognet if ( roundingMode == float_round_nearest_even ) { 1682129203Scognet z += lastBitMask>>1; 1683129203Scognet if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 1684129203Scognet } 1685129203Scognet else if ( roundingMode != float_round_to_zero ) { 1686129203Scognet if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { 1687129203Scognet z += roundBitsMask; 1688129203Scognet } 1689129203Scognet } 1690129203Scognet z &= ~ roundBitsMask; 1691129203Scognet if ( z != a ) float_exception_flags |= float_flag_inexact; 1692129203Scognet return z; 1693129203Scognet 1694129203Scognet} 1695129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 1696129203Scognet 1697129203Scognet/* 1698129203Scognet------------------------------------------------------------------------------- 1699129203ScognetReturns the result of adding the absolute values of the single-precision 1700129203Scognetfloating-point values `a' and `b'. If `zSign' is 1, the sum is negated 1701129203Scognetbefore being returned. `zSign' is ignored if the result is a NaN. 1702129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary 1703129203ScognetFloating-Point Arithmetic. 1704129203Scognet------------------------------------------------------------------------------- 1705129203Scognet*/ 1706129203Scognetstatic float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) 1707129203Scognet{ 1708129203Scognet int16 aExp, bExp, zExp; 1709129203Scognet bits32 aSig, bSig, zSig; 1710129203Scognet int16 expDiff; 1711129203Scognet 1712129203Scognet aSig = extractFloat32Frac( a ); 1713129203Scognet aExp = extractFloat32Exp( a ); 1714129203Scognet bSig = extractFloat32Frac( b ); 1715129203Scognet bExp = extractFloat32Exp( b ); 1716129203Scognet expDiff = aExp - bExp; 1717129203Scognet aSig <<= 6; 1718129203Scognet bSig <<= 6; 1719129203Scognet if ( 0 < expDiff ) { 1720129203Scognet if ( aExp == 0xFF ) { 1721129203Scognet if ( aSig ) return propagateFloat32NaN( a, b ); 1722129203Scognet return a; 1723129203Scognet } 1724129203Scognet if ( bExp == 0 ) { 1725129203Scognet --expDiff; 1726129203Scognet } 1727129203Scognet else { 1728129203Scognet bSig |= 0x20000000; 1729129203Scognet } 1730129203Scognet shift32RightJamming( bSig, expDiff, &bSig ); 1731129203Scognet zExp = aExp; 1732129203Scognet } 1733129203Scognet else if ( expDiff < 0 ) { 1734129203Scognet if ( bExp == 0xFF ) { 1735129203Scognet if ( bSig ) return propagateFloat32NaN( a, b ); 1736129203Scognet return packFloat32( zSign, 0xFF, 0 ); 1737129203Scognet } 1738129203Scognet if ( aExp == 0 ) { 1739129203Scognet ++expDiff; 1740129203Scognet } 1741129203Scognet else { 1742129203Scognet aSig |= 0x20000000; 1743129203Scognet } 1744129203Scognet shift32RightJamming( aSig, - expDiff, &aSig ); 1745129203Scognet zExp = bExp; 1746129203Scognet } 1747129203Scognet else { 1748129203Scognet if ( aExp == 0xFF ) { 1749129203Scognet if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 1750129203Scognet return a; 1751129203Scognet } 1752129203Scognet if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); 1753129203Scognet zSig = 0x40000000 + aSig + bSig; 1754129203Scognet zExp = aExp; 1755129203Scognet goto roundAndPack; 1756129203Scognet } 1757129203Scognet aSig |= 0x20000000; 1758129203Scognet zSig = ( aSig + bSig )<<1; 1759129203Scognet --zExp; 1760129203Scognet if ( (sbits32) zSig < 0 ) { 1761129203Scognet zSig = aSig + bSig; 1762129203Scognet ++zExp; 1763129203Scognet } 1764129203Scognet roundAndPack: 1765129203Scognet return roundAndPackFloat32( zSign, zExp, zSig ); 1766129203Scognet 1767129203Scognet} 1768129203Scognet 1769129203Scognet/* 1770129203Scognet------------------------------------------------------------------------------- 1771129203ScognetReturns the result of subtracting the absolute values of the single- 1772129203Scognetprecision floating-point values `a' and `b'. If `zSign' is 1, the 1773129203Scognetdifference is negated before being returned. `zSign' is ignored if the 1774129203Scognetresult is a NaN. The subtraction is performed according to the IEC/IEEE 1775129203ScognetStandard for Binary Floating-Point Arithmetic. 1776129203Scognet------------------------------------------------------------------------------- 1777129203Scognet*/ 1778129203Scognetstatic float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) 1779129203Scognet{ 1780129203Scognet int16 aExp, bExp, zExp; 1781129203Scognet bits32 aSig, bSig, zSig; 1782129203Scognet int16 expDiff; 1783129203Scognet 1784129203Scognet aSig = extractFloat32Frac( a ); 1785129203Scognet aExp = extractFloat32Exp( a ); 1786129203Scognet bSig = extractFloat32Frac( b ); 1787129203Scognet bExp = extractFloat32Exp( b ); 1788129203Scognet expDiff = aExp - bExp; 1789129203Scognet aSig <<= 7; 1790129203Scognet bSig <<= 7; 1791129203Scognet if ( 0 < expDiff ) goto aExpBigger; 1792129203Scognet if ( expDiff < 0 ) goto bExpBigger; 1793129203Scognet if ( aExp == 0xFF ) { 1794129203Scognet if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 1795129203Scognet float_raise( float_flag_invalid ); 1796129203Scognet return float32_default_nan; 1797129203Scognet } 1798129203Scognet if ( aExp == 0 ) { 1799129203Scognet aExp = 1; 1800129203Scognet bExp = 1; 1801129203Scognet } 1802129203Scognet if ( bSig < aSig ) goto aBigger; 1803129203Scognet if ( aSig < bSig ) goto bBigger; 1804129203Scognet return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); 1805129203Scognet bExpBigger: 1806129203Scognet if ( bExp == 0xFF ) { 1807129203Scognet if ( bSig ) return propagateFloat32NaN( a, b ); 1808129203Scognet return packFloat32( zSign ^ 1, 0xFF, 0 ); 1809129203Scognet } 1810129203Scognet if ( aExp == 0 ) { 1811129203Scognet ++expDiff; 1812129203Scognet } 1813129203Scognet else { 1814129203Scognet aSig |= 0x40000000; 1815129203Scognet } 1816129203Scognet shift32RightJamming( aSig, - expDiff, &aSig ); 1817129203Scognet bSig |= 0x40000000; 1818129203Scognet bBigger: 1819129203Scognet zSig = bSig - aSig; 1820129203Scognet zExp = bExp; 1821129203Scognet zSign ^= 1; 1822129203Scognet goto normalizeRoundAndPack; 1823129203Scognet aExpBigger: 1824129203Scognet if ( aExp == 0xFF ) { 1825129203Scognet if ( aSig ) return propagateFloat32NaN( a, b ); 1826129203Scognet return a; 1827129203Scognet } 1828129203Scognet if ( bExp == 0 ) { 1829129203Scognet --expDiff; 1830129203Scognet } 1831129203Scognet else { 1832129203Scognet bSig |= 0x40000000; 1833129203Scognet } 1834129203Scognet shift32RightJamming( bSig, expDiff, &bSig ); 1835129203Scognet aSig |= 0x40000000; 1836129203Scognet aBigger: 1837129203Scognet zSig = aSig - bSig; 1838129203Scognet zExp = aExp; 1839129203Scognet normalizeRoundAndPack: 1840129203Scognet --zExp; 1841129203Scognet return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); 1842129203Scognet 1843129203Scognet} 1844129203Scognet 1845129203Scognet/* 1846129203Scognet------------------------------------------------------------------------------- 1847129203ScognetReturns the result of adding the single-precision floating-point values `a' 1848129203Scognetand `b'. The operation is performed according to the IEC/IEEE Standard for 1849129203ScognetBinary Floating-Point Arithmetic. 1850129203Scognet------------------------------------------------------------------------------- 1851129203Scognet*/ 1852129203Scognetfloat32 float32_add( float32 a, float32 b ) 1853129203Scognet{ 1854129203Scognet flag aSign, bSign; 1855129203Scognet 1856129203Scognet aSign = extractFloat32Sign( a ); 1857129203Scognet bSign = extractFloat32Sign( b ); 1858129203Scognet if ( aSign == bSign ) { 1859129203Scognet return addFloat32Sigs( a, b, aSign ); 1860129203Scognet } 1861129203Scognet else { 1862129203Scognet return subFloat32Sigs( a, b, aSign ); 1863129203Scognet } 1864129203Scognet 1865129203Scognet} 1866129203Scognet 1867129203Scognet/* 1868129203Scognet------------------------------------------------------------------------------- 1869129203ScognetReturns the result of subtracting the single-precision floating-point values 1870129203Scognet`a' and `b'. The operation is performed according to the IEC/IEEE Standard 1871129203Scognetfor Binary Floating-Point Arithmetic. 1872129203Scognet------------------------------------------------------------------------------- 1873129203Scognet*/ 1874129203Scognetfloat32 float32_sub( float32 a, float32 b ) 1875129203Scognet{ 1876129203Scognet flag aSign, bSign; 1877129203Scognet 1878129203Scognet aSign = extractFloat32Sign( a ); 1879129203Scognet bSign = extractFloat32Sign( b ); 1880129203Scognet if ( aSign == bSign ) { 1881129203Scognet return subFloat32Sigs( a, b, aSign ); 1882129203Scognet } 1883129203Scognet else { 1884129203Scognet return addFloat32Sigs( a, b, aSign ); 1885129203Scognet } 1886129203Scognet 1887129203Scognet} 1888129203Scognet 1889129203Scognet/* 1890129203Scognet------------------------------------------------------------------------------- 1891129203ScognetReturns the result of multiplying the single-precision floating-point values 1892129203Scognet`a' and `b'. The operation is performed according to the IEC/IEEE Standard 1893129203Scognetfor Binary Floating-Point Arithmetic. 1894129203Scognet------------------------------------------------------------------------------- 1895129203Scognet*/ 1896129203Scognetfloat32 float32_mul( float32 a, float32 b ) 1897129203Scognet{ 1898129203Scognet flag aSign, bSign, zSign; 1899129203Scognet int16 aExp, bExp, zExp; 1900129203Scognet bits32 aSig, bSig; 1901129203Scognet bits64 zSig64; 1902129203Scognet bits32 zSig; 1903129203Scognet 1904129203Scognet aSig = extractFloat32Frac( a ); 1905129203Scognet aExp = extractFloat32Exp( a ); 1906129203Scognet aSign = extractFloat32Sign( a ); 1907129203Scognet bSig = extractFloat32Frac( b ); 1908129203Scognet bExp = extractFloat32Exp( b ); 1909129203Scognet bSign = extractFloat32Sign( b ); 1910129203Scognet zSign = aSign ^ bSign; 1911129203Scognet if ( aExp == 0xFF ) { 1912129203Scognet if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 1913129203Scognet return propagateFloat32NaN( a, b ); 1914129203Scognet } 1915129203Scognet if ( ( bExp | bSig ) == 0 ) { 1916129203Scognet float_raise( float_flag_invalid ); 1917129203Scognet return float32_default_nan; 1918129203Scognet } 1919129203Scognet return packFloat32( zSign, 0xFF, 0 ); 1920129203Scognet } 1921129203Scognet if ( bExp == 0xFF ) { 1922129203Scognet if ( bSig ) return propagateFloat32NaN( a, b ); 1923129203Scognet if ( ( aExp | aSig ) == 0 ) { 1924129203Scognet float_raise( float_flag_invalid ); 1925129203Scognet return float32_default_nan; 1926129203Scognet } 1927129203Scognet return packFloat32( zSign, 0xFF, 0 ); 1928129203Scognet } 1929129203Scognet if ( aExp == 0 ) { 1930129203Scognet if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1931129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1932129203Scognet } 1933129203Scognet if ( bExp == 0 ) { 1934129203Scognet if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); 1935129203Scognet normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1936129203Scognet } 1937129203Scognet zExp = aExp + bExp - 0x7F; 1938129203Scognet aSig = ( aSig | 0x00800000 )<<7; 1939129203Scognet bSig = ( bSig | 0x00800000 )<<8; 1940129203Scognet shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 ); 1941129203Scognet zSig = zSig64; 1942129203Scognet if ( 0 <= (sbits32) ( zSig<<1 ) ) { 1943129203Scognet zSig <<= 1; 1944129203Scognet --zExp; 1945129203Scognet } 1946129203Scognet return roundAndPackFloat32( zSign, zExp, zSig ); 1947129203Scognet 1948129203Scognet} 1949129203Scognet 1950129203Scognet/* 1951129203Scognet------------------------------------------------------------------------------- 1952129203ScognetReturns the result of dividing the single-precision floating-point value `a' 1953129203Scognetby the corresponding value `b'. The operation is performed according to the 1954129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 1955129203Scognet------------------------------------------------------------------------------- 1956129203Scognet*/ 1957129203Scognetfloat32 float32_div( float32 a, float32 b ) 1958129203Scognet{ 1959129203Scognet flag aSign, bSign, zSign; 1960129203Scognet int16 aExp, bExp, zExp; 1961129203Scognet bits32 aSig, bSig, zSig; 1962129203Scognet 1963129203Scognet aSig = extractFloat32Frac( a ); 1964129203Scognet aExp = extractFloat32Exp( a ); 1965129203Scognet aSign = extractFloat32Sign( a ); 1966129203Scognet bSig = extractFloat32Frac( b ); 1967129203Scognet bExp = extractFloat32Exp( b ); 1968129203Scognet bSign = extractFloat32Sign( b ); 1969129203Scognet zSign = aSign ^ bSign; 1970129203Scognet if ( aExp == 0xFF ) { 1971129203Scognet if ( aSig ) return propagateFloat32NaN( a, b ); 1972129203Scognet if ( bExp == 0xFF ) { 1973129203Scognet if ( bSig ) return propagateFloat32NaN( a, b ); 1974129203Scognet float_raise( float_flag_invalid ); 1975129203Scognet return float32_default_nan; 1976129203Scognet } 1977129203Scognet return packFloat32( zSign, 0xFF, 0 ); 1978129203Scognet } 1979129203Scognet if ( bExp == 0xFF ) { 1980129203Scognet if ( bSig ) return propagateFloat32NaN( a, b ); 1981129203Scognet return packFloat32( zSign, 0, 0 ); 1982129203Scognet } 1983129203Scognet if ( bExp == 0 ) { 1984129203Scognet if ( bSig == 0 ) { 1985129203Scognet if ( ( aExp | aSig ) == 0 ) { 1986129203Scognet float_raise( float_flag_invalid ); 1987129203Scognet return float32_default_nan; 1988129203Scognet } 1989129203Scognet float_raise( float_flag_divbyzero ); 1990129203Scognet return packFloat32( zSign, 0xFF, 0 ); 1991129203Scognet } 1992129203Scognet normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1993129203Scognet } 1994129203Scognet if ( aExp == 0 ) { 1995129203Scognet if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1996129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1997129203Scognet } 1998129203Scognet zExp = aExp - bExp + 0x7D; 1999129203Scognet aSig = ( aSig | 0x00800000 )<<7; 2000129203Scognet bSig = ( bSig | 0x00800000 )<<8; 2001129203Scognet if ( bSig <= ( aSig + aSig ) ) { 2002129203Scognet aSig >>= 1; 2003129203Scognet ++zExp; 2004129203Scognet } 2005129203Scognet zSig = ( ( (bits64) aSig )<<32 ) / bSig; 2006129203Scognet if ( ( zSig & 0x3F ) == 0 ) { 2007129203Scognet zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 ); 2008129203Scognet } 2009129203Scognet return roundAndPackFloat32( zSign, zExp, zSig ); 2010129203Scognet 2011129203Scognet} 2012129203Scognet 2013129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2014129203Scognet/* 2015129203Scognet------------------------------------------------------------------------------- 2016129203ScognetReturns the remainder of the single-precision floating-point value `a' 2017129203Scognetwith respect to the corresponding value `b'. The operation is performed 2018129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2019129203Scognet------------------------------------------------------------------------------- 2020129203Scognet*/ 2021129203Scognetfloat32 float32_rem( float32 a, float32 b ) 2022129203Scognet{ 2023129203Scognet flag aSign, bSign, zSign; 2024129203Scognet int16 aExp, bExp, expDiff; 2025129203Scognet bits32 aSig, bSig; 2026129203Scognet bits32 q; 2027129203Scognet bits64 aSig64, bSig64, q64; 2028129203Scognet bits32 alternateASig; 2029129203Scognet sbits32 sigMean; 2030129203Scognet 2031129203Scognet aSig = extractFloat32Frac( a ); 2032129203Scognet aExp = extractFloat32Exp( a ); 2033129203Scognet aSign = extractFloat32Sign( a ); 2034129203Scognet bSig = extractFloat32Frac( b ); 2035129203Scognet bExp = extractFloat32Exp( b ); 2036129203Scognet bSign = extractFloat32Sign( b ); 2037129203Scognet if ( aExp == 0xFF ) { 2038129203Scognet if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 2039129203Scognet return propagateFloat32NaN( a, b ); 2040129203Scognet } 2041129203Scognet float_raise( float_flag_invalid ); 2042129203Scognet return float32_default_nan; 2043129203Scognet } 2044129203Scognet if ( bExp == 0xFF ) { 2045129203Scognet if ( bSig ) return propagateFloat32NaN( a, b ); 2046129203Scognet return a; 2047129203Scognet } 2048129203Scognet if ( bExp == 0 ) { 2049129203Scognet if ( bSig == 0 ) { 2050129203Scognet float_raise( float_flag_invalid ); 2051129203Scognet return float32_default_nan; 2052129203Scognet } 2053129203Scognet normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 2054129203Scognet } 2055129203Scognet if ( aExp == 0 ) { 2056129203Scognet if ( aSig == 0 ) return a; 2057129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 2058129203Scognet } 2059129203Scognet expDiff = aExp - bExp; 2060129203Scognet aSig |= 0x00800000; 2061129203Scognet bSig |= 0x00800000; 2062129203Scognet if ( expDiff < 32 ) { 2063129203Scognet aSig <<= 8; 2064129203Scognet bSig <<= 8; 2065129203Scognet if ( expDiff < 0 ) { 2066129203Scognet if ( expDiff < -1 ) return a; 2067129203Scognet aSig >>= 1; 2068129203Scognet } 2069129203Scognet q = ( bSig <= aSig ); 2070129203Scognet if ( q ) aSig -= bSig; 2071129203Scognet if ( 0 < expDiff ) { 2072129203Scognet q = ( ( (bits64) aSig )<<32 ) / bSig; 2073129203Scognet q >>= 32 - expDiff; 2074129203Scognet bSig >>= 2; 2075129203Scognet aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 2076129203Scognet } 2077129203Scognet else { 2078129203Scognet aSig >>= 2; 2079129203Scognet bSig >>= 2; 2080129203Scognet } 2081129203Scognet } 2082129203Scognet else { 2083129203Scognet if ( bSig <= aSig ) aSig -= bSig; 2084129203Scognet aSig64 = ( (bits64) aSig )<<40; 2085129203Scognet bSig64 = ( (bits64) bSig )<<40; 2086129203Scognet expDiff -= 64; 2087129203Scognet while ( 0 < expDiff ) { 2088129203Scognet q64 = estimateDiv128To64( aSig64, 0, bSig64 ); 2089129203Scognet q64 = ( 2 < q64 ) ? q64 - 2 : 0; 2090129203Scognet aSig64 = - ( ( bSig * q64 )<<38 ); 2091129203Scognet expDiff -= 62; 2092129203Scognet } 2093129203Scognet expDiff += 64; 2094129203Scognet q64 = estimateDiv128To64( aSig64, 0, bSig64 ); 2095129203Scognet q64 = ( 2 < q64 ) ? q64 - 2 : 0; 2096129203Scognet q = q64>>( 64 - expDiff ); 2097129203Scognet bSig <<= 6; 2098129203Scognet aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; 2099129203Scognet } 2100129203Scognet do { 2101129203Scognet alternateASig = aSig; 2102129203Scognet ++q; 2103129203Scognet aSig -= bSig; 2104129203Scognet } while ( 0 <= (sbits32) aSig ); 2105129203Scognet sigMean = aSig + alternateASig; 2106129203Scognet if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 2107129203Scognet aSig = alternateASig; 2108129203Scognet } 2109129203Scognet zSign = ( (sbits32) aSig < 0 ); 2110129203Scognet if ( zSign ) aSig = - aSig; 2111129203Scognet return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); 2112129203Scognet 2113129203Scognet} 2114129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 2115129203Scognet 2116129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2117129203Scognet/* 2118129203Scognet------------------------------------------------------------------------------- 2119129203ScognetReturns the square root of the single-precision floating-point value `a'. 2120129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary 2121129203ScognetFloating-Point Arithmetic. 2122129203Scognet------------------------------------------------------------------------------- 2123129203Scognet*/ 2124129203Scognetfloat32 float32_sqrt( float32 a ) 2125129203Scognet{ 2126129203Scognet flag aSign; 2127129203Scognet int16 aExp, zExp; 2128129203Scognet bits32 aSig, zSig; 2129129203Scognet bits64 rem, term; 2130129203Scognet 2131129203Scognet aSig = extractFloat32Frac( a ); 2132129203Scognet aExp = extractFloat32Exp( a ); 2133129203Scognet aSign = extractFloat32Sign( a ); 2134129203Scognet if ( aExp == 0xFF ) { 2135129203Scognet if ( aSig ) return propagateFloat32NaN( a, 0 ); 2136129203Scognet if ( ! aSign ) return a; 2137129203Scognet float_raise( float_flag_invalid ); 2138129203Scognet return float32_default_nan; 2139129203Scognet } 2140129203Scognet if ( aSign ) { 2141129203Scognet if ( ( aExp | aSig ) == 0 ) return a; 2142129203Scognet float_raise( float_flag_invalid ); 2143129203Scognet return float32_default_nan; 2144129203Scognet } 2145129203Scognet if ( aExp == 0 ) { 2146129203Scognet if ( aSig == 0 ) return 0; 2147129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 2148129203Scognet } 2149129203Scognet zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; 2150129203Scognet aSig = ( aSig | 0x00800000 )<<8; 2151129203Scognet zSig = estimateSqrt32( aExp, aSig ) + 2; 2152129203Scognet if ( ( zSig & 0x7F ) <= 5 ) { 2153129203Scognet if ( zSig < 2 ) { 2154129203Scognet zSig = 0x7FFFFFFF; 2155129203Scognet goto roundAndPack; 2156129203Scognet } 2157129203Scognet aSig >>= aExp & 1; 2158129203Scognet term = ( (bits64) zSig ) * zSig; 2159129203Scognet rem = ( ( (bits64) aSig )<<32 ) - term; 2160129203Scognet while ( (sbits64) rem < 0 ) { 2161129203Scognet --zSig; 2162129203Scognet rem += ( ( (bits64) zSig )<<1 ) | 1; 2163129203Scognet } 2164129203Scognet zSig |= ( rem != 0 ); 2165129203Scognet } 2166129203Scognet shift32RightJamming( zSig, 1, &zSig ); 2167129203Scognet roundAndPack: 2168129203Scognet return roundAndPackFloat32( 0, zExp, zSig ); 2169129203Scognet 2170129203Scognet} 2171129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 2172129203Scognet 2173129203Scognet/* 2174129203Scognet------------------------------------------------------------------------------- 2175129203ScognetReturns 1 if the single-precision floating-point value `a' is equal to 2176129203Scognetthe corresponding value `b', and 0 otherwise. The comparison is performed 2177129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2178129203Scognet------------------------------------------------------------------------------- 2179129203Scognet*/ 2180129203Scognetflag float32_eq( float32 a, float32 b ) 2181129203Scognet{ 2182129203Scognet 2183129203Scognet if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2184129203Scognet || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2185129203Scognet ) { 2186129203Scognet if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 2187129203Scognet float_raise( float_flag_invalid ); 2188129203Scognet } 2189129203Scognet return 0; 2190129203Scognet } 2191129203Scognet return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 2192129203Scognet 2193129203Scognet} 2194129203Scognet 2195129203Scognet/* 2196129203Scognet------------------------------------------------------------------------------- 2197129203ScognetReturns 1 if the single-precision floating-point value `a' is less than 2198129203Scognetor equal to the corresponding value `b', and 0 otherwise. The comparison 2199129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 2200129203ScognetArithmetic. 2201129203Scognet------------------------------------------------------------------------------- 2202129203Scognet*/ 2203129203Scognetflag float32_le( float32 a, float32 b ) 2204129203Scognet{ 2205129203Scognet flag aSign, bSign; 2206129203Scognet 2207129203Scognet if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2208129203Scognet || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2209129203Scognet ) { 2210129203Scognet float_raise( float_flag_invalid ); 2211129203Scognet return 0; 2212129203Scognet } 2213129203Scognet aSign = extractFloat32Sign( a ); 2214129203Scognet bSign = extractFloat32Sign( b ); 2215129203Scognet if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 2216129203Scognet return ( a == b ) || ( aSign ^ ( a < b ) ); 2217129203Scognet 2218129203Scognet} 2219129203Scognet 2220129203Scognet/* 2221129203Scognet------------------------------------------------------------------------------- 2222129203ScognetReturns 1 if the single-precision floating-point value `a' is less than 2223129203Scognetthe corresponding value `b', and 0 otherwise. The comparison is performed 2224129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2225129203Scognet------------------------------------------------------------------------------- 2226129203Scognet*/ 2227129203Scognetflag float32_lt( float32 a, float32 b ) 2228129203Scognet{ 2229129203Scognet flag aSign, bSign; 2230129203Scognet 2231129203Scognet if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2232129203Scognet || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2233129203Scognet ) { 2234129203Scognet float_raise( float_flag_invalid ); 2235129203Scognet return 0; 2236129203Scognet } 2237129203Scognet aSign = extractFloat32Sign( a ); 2238129203Scognet bSign = extractFloat32Sign( b ); 2239129203Scognet if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 2240129203Scognet return ( a != b ) && ( aSign ^ ( a < b ) ); 2241129203Scognet 2242129203Scognet} 2243129203Scognet 2244129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2245129203Scognet/* 2246129203Scognet------------------------------------------------------------------------------- 2247129203ScognetReturns 1 if the single-precision floating-point value `a' is equal to 2248129203Scognetthe corresponding value `b', and 0 otherwise. The invalid exception is 2249129203Scognetraised if either operand is a NaN. Otherwise, the comparison is performed 2250129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2251129203Scognet------------------------------------------------------------------------------- 2252129203Scognet*/ 2253129203Scognetflag float32_eq_signaling( float32 a, float32 b ) 2254129203Scognet{ 2255129203Scognet 2256129203Scognet if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2257129203Scognet || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2258129203Scognet ) { 2259129203Scognet float_raise( float_flag_invalid ); 2260129203Scognet return 0; 2261129203Scognet } 2262129203Scognet return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 2263129203Scognet 2264129203Scognet} 2265129203Scognet 2266129203Scognet/* 2267129203Scognet------------------------------------------------------------------------------- 2268129203ScognetReturns 1 if the single-precision floating-point value `a' is less than or 2269129203Scognetequal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 2270129203Scognetcause an exception. Otherwise, the comparison is performed according to the 2271129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 2272129203Scognet------------------------------------------------------------------------------- 2273129203Scognet*/ 2274129203Scognetflag float32_le_quiet( float32 a, float32 b ) 2275129203Scognet{ 2276129203Scognet flag aSign, bSign; 2277129203Scognet 2278129203Scognet if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2279129203Scognet || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2280129203Scognet ) { 2281129203Scognet if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 2282129203Scognet float_raise( float_flag_invalid ); 2283129203Scognet } 2284129203Scognet return 0; 2285129203Scognet } 2286129203Scognet aSign = extractFloat32Sign( a ); 2287129203Scognet bSign = extractFloat32Sign( b ); 2288129203Scognet if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 2289129203Scognet return ( a == b ) || ( aSign ^ ( a < b ) ); 2290129203Scognet 2291129203Scognet} 2292129203Scognet 2293129203Scognet/* 2294129203Scognet------------------------------------------------------------------------------- 2295129203ScognetReturns 1 if the single-precision floating-point value `a' is less than 2296129203Scognetthe corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 2297129203Scognetexception. Otherwise, the comparison is performed according to the IEC/IEEE 2298129203ScognetStandard for Binary Floating-Point Arithmetic. 2299129203Scognet------------------------------------------------------------------------------- 2300129203Scognet*/ 2301129203Scognetflag float32_lt_quiet( float32 a, float32 b ) 2302129203Scognet{ 2303129203Scognet flag aSign, bSign; 2304129203Scognet 2305129203Scognet if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2306129203Scognet || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2307129203Scognet ) { 2308129203Scognet if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 2309129203Scognet float_raise( float_flag_invalid ); 2310129203Scognet } 2311129203Scognet return 0; 2312129203Scognet } 2313129203Scognet aSign = extractFloat32Sign( a ); 2314129203Scognet bSign = extractFloat32Sign( b ); 2315129203Scognet if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 2316129203Scognet return ( a != b ) && ( aSign ^ ( a < b ) ); 2317129203Scognet 2318129203Scognet} 2319129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 2320129203Scognet 2321129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2322129203Scognet/* 2323129203Scognet------------------------------------------------------------------------------- 2324129203ScognetReturns the result of converting the double-precision floating-point value 2325129203Scognet`a' to the 32-bit two's complement integer format. The conversion is 2326129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 2327129203ScognetArithmetic---which means in particular that the conversion is rounded 2328129203Scognetaccording to the current rounding mode. If `a' is a NaN, the largest 2329129203Scognetpositive integer is returned. Otherwise, if the conversion overflows, the 2330129203Scognetlargest integer with the same sign as `a' is returned. 2331129203Scognet------------------------------------------------------------------------------- 2332129203Scognet*/ 2333129203Scognetint32 float64_to_int32( float64 a ) 2334129203Scognet{ 2335129203Scognet flag aSign; 2336129203Scognet int16 aExp, shiftCount; 2337129203Scognet bits64 aSig; 2338129203Scognet 2339129203Scognet aSig = extractFloat64Frac( a ); 2340129203Scognet aExp = extractFloat64Exp( a ); 2341129203Scognet aSign = extractFloat64Sign( a ); 2342129203Scognet if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; 2343129203Scognet if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 2344129203Scognet shiftCount = 0x42C - aExp; 2345129203Scognet if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); 2346129203Scognet return roundAndPackInt32( aSign, aSig ); 2347129203Scognet 2348129203Scognet} 2349129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 2350129203Scognet 2351129203Scognet/* 2352129203Scognet------------------------------------------------------------------------------- 2353129203ScognetReturns the result of converting the double-precision floating-point value 2354129203Scognet`a' to the 32-bit two's complement integer format. The conversion is 2355129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 2356129203ScognetArithmetic, except that the conversion is always rounded toward zero. 2357129203ScognetIf `a' is a NaN, the largest positive integer is returned. Otherwise, if 2358129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is 2359129203Scognetreturned. 2360129203Scognet------------------------------------------------------------------------------- 2361129203Scognet*/ 2362129203Scognetint32 float64_to_int32_round_to_zero( float64 a ) 2363129203Scognet{ 2364129203Scognet flag aSign; 2365129203Scognet int16 aExp, shiftCount; 2366129203Scognet bits64 aSig, savedASig; 2367129203Scognet int32 z; 2368129203Scognet 2369129203Scognet aSig = extractFloat64Frac( a ); 2370129203Scognet aExp = extractFloat64Exp( a ); 2371129203Scognet aSign = extractFloat64Sign( a ); 2372129203Scognet if ( 0x41E < aExp ) { 2373129203Scognet if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; 2374129203Scognet goto invalid; 2375129203Scognet } 2376129203Scognet else if ( aExp < 0x3FF ) { 2377129203Scognet if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; 2378129203Scognet return 0; 2379129203Scognet } 2380129203Scognet aSig |= LIT64( 0x0010000000000000 ); 2381129203Scognet shiftCount = 0x433 - aExp; 2382129203Scognet savedASig = aSig; 2383129203Scognet aSig >>= shiftCount; 2384129203Scognet z = aSig; 2385129203Scognet if ( aSign ) z = - z; 2386129203Scognet if ( ( z < 0 ) ^ aSign ) { 2387129203Scognet invalid: 2388129203Scognet float_raise( float_flag_invalid ); 2389129203Scognet return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 2390129203Scognet } 2391129203Scognet if ( ( aSig<<shiftCount ) != savedASig ) { 2392129203Scognet float_exception_flags |= float_flag_inexact; 2393129203Scognet } 2394129203Scognet return z; 2395129203Scognet 2396129203Scognet} 2397129203Scognet 2398129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2399129203Scognet/* 2400129203Scognet------------------------------------------------------------------------------- 2401129203ScognetReturns the result of converting the double-precision floating-point value 2402129203Scognet`a' to the 64-bit two's complement integer format. The conversion is 2403129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 2404129203ScognetArithmetic---which means in particular that the conversion is rounded 2405129203Scognetaccording to the current rounding mode. If `a' is a NaN, the largest 2406129203Scognetpositive integer is returned. Otherwise, if the conversion overflows, the 2407129203Scognetlargest integer with the same sign as `a' is returned. 2408129203Scognet------------------------------------------------------------------------------- 2409129203Scognet*/ 2410129203Scognetint64 float64_to_int64( float64 a ) 2411129203Scognet{ 2412129203Scognet flag aSign; 2413129203Scognet int16 aExp, shiftCount; 2414129203Scognet bits64 aSig, aSigExtra; 2415129203Scognet 2416129203Scognet aSig = extractFloat64Frac( a ); 2417129203Scognet aExp = extractFloat64Exp( a ); 2418129203Scognet aSign = extractFloat64Sign( a ); 2419129203Scognet if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 2420129203Scognet shiftCount = 0x433 - aExp; 2421129203Scognet if ( shiftCount <= 0 ) { 2422129203Scognet if ( 0x43E < aExp ) { 2423129203Scognet float_raise( float_flag_invalid ); 2424129203Scognet if ( ! aSign 2425129203Scognet || ( ( aExp == 0x7FF ) 2426129203Scognet && ( aSig != LIT64( 0x0010000000000000 ) ) ) 2427129203Scognet ) { 2428129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 2429129203Scognet } 2430129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 2431129203Scognet } 2432129203Scognet aSigExtra = 0; 2433129203Scognet aSig <<= - shiftCount; 2434129203Scognet } 2435129203Scognet else { 2436129203Scognet shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); 2437129203Scognet } 2438129203Scognet return roundAndPackInt64( aSign, aSig, aSigExtra ); 2439129203Scognet 2440129203Scognet} 2441129203Scognet 2442129203Scognet/* 2443129203Scognet------------------------------------------------------------------------------- 2444129203ScognetReturns the result of converting the double-precision floating-point value 2445129203Scognet`a' to the 64-bit two's complement integer format. The conversion is 2446129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 2447129203ScognetArithmetic, except that the conversion is always rounded toward zero. 2448129203ScognetIf `a' is a NaN, the largest positive integer is returned. Otherwise, if 2449129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is 2450129203Scognetreturned. 2451129203Scognet------------------------------------------------------------------------------- 2452129203Scognet*/ 2453129203Scognetint64 float64_to_int64_round_to_zero( float64 a ) 2454129203Scognet{ 2455129203Scognet flag aSign; 2456129203Scognet int16 aExp, shiftCount; 2457129203Scognet bits64 aSig; 2458129203Scognet int64 z; 2459129203Scognet 2460129203Scognet aSig = extractFloat64Frac( a ); 2461129203Scognet aExp = extractFloat64Exp( a ); 2462129203Scognet aSign = extractFloat64Sign( a ); 2463129203Scognet if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 2464129203Scognet shiftCount = aExp - 0x433; 2465129203Scognet if ( 0 <= shiftCount ) { 2466129203Scognet if ( 0x43E <= aExp ) { 2467129203Scognet if ( a != LIT64( 0xC3E0000000000000 ) ) { 2468129203Scognet float_raise( float_flag_invalid ); 2469129203Scognet if ( ! aSign 2470129203Scognet || ( ( aExp == 0x7FF ) 2471129203Scognet && ( aSig != LIT64( 0x0010000000000000 ) ) ) 2472129203Scognet ) { 2473129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 2474129203Scognet } 2475129203Scognet } 2476129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 2477129203Scognet } 2478129203Scognet z = aSig<<shiftCount; 2479129203Scognet } 2480129203Scognet else { 2481129203Scognet if ( aExp < 0x3FE ) { 2482129203Scognet if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 2483129203Scognet return 0; 2484129203Scognet } 2485129203Scognet z = aSig>>( - shiftCount ); 2486129203Scognet if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { 2487129203Scognet float_exception_flags |= float_flag_inexact; 2488129203Scognet } 2489129203Scognet } 2490129203Scognet if ( aSign ) z = - z; 2491129203Scognet return z; 2492129203Scognet 2493129203Scognet} 2494129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 2495129203Scognet 2496129203Scognet/* 2497129203Scognet------------------------------------------------------------------------------- 2498129203ScognetReturns the result of converting the double-precision floating-point value 2499129203Scognet`a' to the single-precision floating-point format. The conversion is 2500129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 2501129203ScognetArithmetic. 2502129203Scognet------------------------------------------------------------------------------- 2503129203Scognet*/ 2504129203Scognetfloat32 float64_to_float32( float64 a ) 2505129203Scognet{ 2506129203Scognet flag aSign; 2507129203Scognet int16 aExp; 2508129203Scognet bits64 aSig; 2509129203Scognet bits32 zSig; 2510129203Scognet 2511129203Scognet aSig = extractFloat64Frac( a ); 2512129203Scognet aExp = extractFloat64Exp( a ); 2513129203Scognet aSign = extractFloat64Sign( a ); 2514129203Scognet if ( aExp == 0x7FF ) { 2515129203Scognet if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) ); 2516129203Scognet return packFloat32( aSign, 0xFF, 0 ); 2517129203Scognet } 2518129203Scognet shift64RightJamming( aSig, 22, &aSig ); 2519129203Scognet zSig = aSig; 2520129203Scognet if ( aExp || zSig ) { 2521129203Scognet zSig |= 0x40000000; 2522129203Scognet aExp -= 0x381; 2523129203Scognet } 2524129203Scognet return roundAndPackFloat32( aSign, aExp, zSig ); 2525129203Scognet 2526129203Scognet} 2527129203Scognet 2528129203Scognet#ifdef FLOATX80 2529129203Scognet 2530129203Scognet/* 2531129203Scognet------------------------------------------------------------------------------- 2532129203ScognetReturns the result of converting the double-precision floating-point value 2533129203Scognet`a' to the extended double-precision floating-point format. The conversion 2534129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 2535129203ScognetArithmetic. 2536129203Scognet------------------------------------------------------------------------------- 2537129203Scognet*/ 2538129203Scognetfloatx80 float64_to_floatx80( float64 a ) 2539129203Scognet{ 2540129203Scognet flag aSign; 2541129203Scognet int16 aExp; 2542129203Scognet bits64 aSig; 2543129203Scognet 2544129203Scognet aSig = extractFloat64Frac( a ); 2545129203Scognet aExp = extractFloat64Exp( a ); 2546129203Scognet aSign = extractFloat64Sign( a ); 2547129203Scognet if ( aExp == 0x7FF ) { 2548129203Scognet if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); 2549129203Scognet return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 2550129203Scognet } 2551129203Scognet if ( aExp == 0 ) { 2552129203Scognet if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 2553129203Scognet normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2554129203Scognet } 2555129203Scognet return 2556129203Scognet packFloatx80( 2557129203Scognet aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); 2558129203Scognet 2559129203Scognet} 2560129203Scognet 2561129203Scognet#endif 2562129203Scognet 2563129203Scognet#ifdef FLOAT128 2564129203Scognet 2565129203Scognet/* 2566129203Scognet------------------------------------------------------------------------------- 2567129203ScognetReturns the result of converting the double-precision floating-point value 2568129203Scognet`a' to the quadruple-precision floating-point format. The conversion is 2569129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 2570129203ScognetArithmetic. 2571129203Scognet------------------------------------------------------------------------------- 2572129203Scognet*/ 2573129203Scognetfloat128 float64_to_float128( float64 a ) 2574129203Scognet{ 2575129203Scognet flag aSign; 2576129203Scognet int16 aExp; 2577129203Scognet bits64 aSig, zSig0, zSig1; 2578129203Scognet 2579129203Scognet aSig = extractFloat64Frac( a ); 2580129203Scognet aExp = extractFloat64Exp( a ); 2581129203Scognet aSign = extractFloat64Sign( a ); 2582129203Scognet if ( aExp == 0x7FF ) { 2583129203Scognet if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) ); 2584129203Scognet return packFloat128( aSign, 0x7FFF, 0, 0 ); 2585129203Scognet } 2586129203Scognet if ( aExp == 0 ) { 2587129203Scognet if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); 2588129203Scognet normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2589129203Scognet --aExp; 2590129203Scognet } 2591129203Scognet shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); 2592129203Scognet return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); 2593129203Scognet 2594129203Scognet} 2595129203Scognet 2596129203Scognet#endif 2597129203Scognet 2598129203Scognet#ifndef SOFTFLOAT_FOR_GCC 2599129203Scognet/* 2600129203Scognet------------------------------------------------------------------------------- 2601129203ScognetRounds the double-precision floating-point value `a' to an integer, and 2602129203Scognetreturns the result as a double-precision floating-point value. The 2603129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary 2604129203ScognetFloating-Point Arithmetic. 2605129203Scognet------------------------------------------------------------------------------- 2606129203Scognet*/ 2607129203Scognetfloat64 float64_round_to_int( float64 a ) 2608129203Scognet{ 2609129203Scognet flag aSign; 2610129203Scognet int16 aExp; 2611129203Scognet bits64 lastBitMask, roundBitsMask; 2612129203Scognet int8 roundingMode; 2613129203Scognet float64 z; 2614129203Scognet 2615129203Scognet aExp = extractFloat64Exp( a ); 2616129203Scognet if ( 0x433 <= aExp ) { 2617129203Scognet if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { 2618129203Scognet return propagateFloat64NaN( a, a ); 2619129203Scognet } 2620129203Scognet return a; 2621129203Scognet } 2622129203Scognet if ( aExp < 0x3FF ) { 2623129203Scognet if ( (bits64) ( a<<1 ) == 0 ) return a; 2624129203Scognet float_exception_flags |= float_flag_inexact; 2625129203Scognet aSign = extractFloat64Sign( a ); 2626129203Scognet switch ( float_rounding_mode ) { 2627129203Scognet case float_round_nearest_even: 2628129203Scognet if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { 2629129203Scognet return packFloat64( aSign, 0x3FF, 0 ); 2630129203Scognet } 2631129203Scognet break; 2632129203Scognet case float_round_to_zero: 2633129203Scognet break; 2634129203Scognet case float_round_down: 2635129203Scognet return aSign ? LIT64( 0xBFF0000000000000 ) : 0; 2636129203Scognet case float_round_up: 2637129203Scognet return 2638129203Scognet aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ); 2639129203Scognet } 2640129203Scognet return packFloat64( aSign, 0, 0 ); 2641129203Scognet } 2642129203Scognet lastBitMask = 1; 2643129203Scognet lastBitMask <<= 0x433 - aExp; 2644129203Scognet roundBitsMask = lastBitMask - 1; 2645129203Scognet z = a; 2646129203Scognet roundingMode = float_rounding_mode; 2647129203Scognet if ( roundingMode == float_round_nearest_even ) { 2648129203Scognet z += lastBitMask>>1; 2649129203Scognet if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 2650129203Scognet } 2651129203Scognet else if ( roundingMode != float_round_to_zero ) { 2652129203Scognet if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) { 2653129203Scognet z += roundBitsMask; 2654129203Scognet } 2655129203Scognet } 2656129203Scognet z &= ~ roundBitsMask; 2657129203Scognet if ( z != a ) float_exception_flags |= float_flag_inexact; 2658129203Scognet return z; 2659129203Scognet 2660129203Scognet} 2661129203Scognet#endif 2662129203Scognet 2663129203Scognet/* 2664129203Scognet------------------------------------------------------------------------------- 2665129203ScognetReturns the result of adding the absolute values of the double-precision 2666129203Scognetfloating-point values `a' and `b'. If `zSign' is 1, the sum is negated 2667129203Scognetbefore being returned. `zSign' is ignored if the result is a NaN. 2668129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary 2669129203ScognetFloating-Point Arithmetic. 2670129203Scognet------------------------------------------------------------------------------- 2671129203Scognet*/ 2672129203Scognetstatic float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) 2673129203Scognet{ 2674129203Scognet int16 aExp, bExp, zExp; 2675129203Scognet bits64 aSig, bSig, zSig; 2676129203Scognet int16 expDiff; 2677129203Scognet 2678129203Scognet aSig = extractFloat64Frac( a ); 2679129203Scognet aExp = extractFloat64Exp( a ); 2680129203Scognet bSig = extractFloat64Frac( b ); 2681129203Scognet bExp = extractFloat64Exp( b ); 2682129203Scognet expDiff = aExp - bExp; 2683129203Scognet aSig <<= 9; 2684129203Scognet bSig <<= 9; 2685129203Scognet if ( 0 < expDiff ) { 2686129203Scognet if ( aExp == 0x7FF ) { 2687129203Scognet if ( aSig ) return propagateFloat64NaN( a, b ); 2688129203Scognet return a; 2689129203Scognet } 2690129203Scognet if ( bExp == 0 ) { 2691129203Scognet --expDiff; 2692129203Scognet } 2693129203Scognet else { 2694129203Scognet bSig |= LIT64( 0x2000000000000000 ); 2695129203Scognet } 2696129203Scognet shift64RightJamming( bSig, expDiff, &bSig ); 2697129203Scognet zExp = aExp; 2698129203Scognet } 2699129203Scognet else if ( expDiff < 0 ) { 2700129203Scognet if ( bExp == 0x7FF ) { 2701129203Scognet if ( bSig ) return propagateFloat64NaN( a, b ); 2702129203Scognet return packFloat64( zSign, 0x7FF, 0 ); 2703129203Scognet } 2704129203Scognet if ( aExp == 0 ) { 2705129203Scognet ++expDiff; 2706129203Scognet } 2707129203Scognet else { 2708129203Scognet aSig |= LIT64( 0x2000000000000000 ); 2709129203Scognet } 2710129203Scognet shift64RightJamming( aSig, - expDiff, &aSig ); 2711129203Scognet zExp = bExp; 2712129203Scognet } 2713129203Scognet else { 2714129203Scognet if ( aExp == 0x7FF ) { 2715129203Scognet if ( aSig | bSig ) return propagateFloat64NaN( a, b ); 2716129203Scognet return a; 2717129203Scognet } 2718129203Scognet if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); 2719129203Scognet zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; 2720129203Scognet zExp = aExp; 2721129203Scognet goto roundAndPack; 2722129203Scognet } 2723129203Scognet aSig |= LIT64( 0x2000000000000000 ); 2724129203Scognet zSig = ( aSig + bSig )<<1; 2725129203Scognet --zExp; 2726129203Scognet if ( (sbits64) zSig < 0 ) { 2727129203Scognet zSig = aSig + bSig; 2728129203Scognet ++zExp; 2729129203Scognet } 2730129203Scognet roundAndPack: 2731129203Scognet return roundAndPackFloat64( zSign, zExp, zSig ); 2732129203Scognet 2733129203Scognet} 2734129203Scognet 2735129203Scognet/* 2736129203Scognet------------------------------------------------------------------------------- 2737129203ScognetReturns the result of subtracting the absolute values of the double- 2738129203Scognetprecision floating-point values `a' and `b'. If `zSign' is 1, the 2739129203Scognetdifference is negated before being returned. `zSign' is ignored if the 2740129203Scognetresult is a NaN. The subtraction is performed according to the IEC/IEEE 2741129203ScognetStandard for Binary Floating-Point Arithmetic. 2742129203Scognet------------------------------------------------------------------------------- 2743129203Scognet*/ 2744129203Scognetstatic float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) 2745129203Scognet{ 2746129203Scognet int16 aExp, bExp, zExp; 2747129203Scognet bits64 aSig, bSig, zSig; 2748129203Scognet int16 expDiff; 2749129203Scognet 2750129203Scognet aSig = extractFloat64Frac( a ); 2751129203Scognet aExp = extractFloat64Exp( a ); 2752129203Scognet bSig = extractFloat64Frac( b ); 2753129203Scognet bExp = extractFloat64Exp( b ); 2754129203Scognet expDiff = aExp - bExp; 2755129203Scognet aSig <<= 10; 2756129203Scognet bSig <<= 10; 2757129203Scognet if ( 0 < expDiff ) goto aExpBigger; 2758129203Scognet if ( expDiff < 0 ) goto bExpBigger; 2759129203Scognet if ( aExp == 0x7FF ) { 2760129203Scognet if ( aSig | bSig ) return propagateFloat64NaN( a, b ); 2761129203Scognet float_raise( float_flag_invalid ); 2762129203Scognet return float64_default_nan; 2763129203Scognet } 2764129203Scognet if ( aExp == 0 ) { 2765129203Scognet aExp = 1; 2766129203Scognet bExp = 1; 2767129203Scognet } 2768129203Scognet if ( bSig < aSig ) goto aBigger; 2769129203Scognet if ( aSig < bSig ) goto bBigger; 2770129203Scognet return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); 2771129203Scognet bExpBigger: 2772129203Scognet if ( bExp == 0x7FF ) { 2773129203Scognet if ( bSig ) return propagateFloat64NaN( a, b ); 2774129203Scognet return packFloat64( zSign ^ 1, 0x7FF, 0 ); 2775129203Scognet } 2776129203Scognet if ( aExp == 0 ) { 2777129203Scognet ++expDiff; 2778129203Scognet } 2779129203Scognet else { 2780129203Scognet aSig |= LIT64( 0x4000000000000000 ); 2781129203Scognet } 2782129203Scognet shift64RightJamming( aSig, - expDiff, &aSig ); 2783129203Scognet bSig |= LIT64( 0x4000000000000000 ); 2784129203Scognet bBigger: 2785129203Scognet zSig = bSig - aSig; 2786129203Scognet zExp = bExp; 2787129203Scognet zSign ^= 1; 2788129203Scognet goto normalizeRoundAndPack; 2789129203Scognet aExpBigger: 2790129203Scognet if ( aExp == 0x7FF ) { 2791129203Scognet if ( aSig ) return propagateFloat64NaN( a, b ); 2792129203Scognet return a; 2793129203Scognet } 2794129203Scognet if ( bExp == 0 ) { 2795129203Scognet --expDiff; 2796129203Scognet } 2797129203Scognet else { 2798129203Scognet bSig |= LIT64( 0x4000000000000000 ); 2799129203Scognet } 2800129203Scognet shift64RightJamming( bSig, expDiff, &bSig ); 2801129203Scognet aSig |= LIT64( 0x4000000000000000 ); 2802129203Scognet aBigger: 2803129203Scognet zSig = aSig - bSig; 2804129203Scognet zExp = aExp; 2805129203Scognet normalizeRoundAndPack: 2806129203Scognet --zExp; 2807129203Scognet return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); 2808129203Scognet 2809129203Scognet} 2810129203Scognet 2811129203Scognet/* 2812129203Scognet------------------------------------------------------------------------------- 2813129203ScognetReturns the result of adding the double-precision floating-point values `a' 2814129203Scognetand `b'. The operation is performed according to the IEC/IEEE Standard for 2815129203ScognetBinary Floating-Point Arithmetic. 2816129203Scognet------------------------------------------------------------------------------- 2817129203Scognet*/ 2818129203Scognetfloat64 float64_add( float64 a, float64 b ) 2819129203Scognet{ 2820129203Scognet flag aSign, bSign; 2821129203Scognet 2822129203Scognet aSign = extractFloat64Sign( a ); 2823129203Scognet bSign = extractFloat64Sign( b ); 2824129203Scognet if ( aSign == bSign ) { 2825129203Scognet return addFloat64Sigs( a, b, aSign ); 2826129203Scognet } 2827129203Scognet else { 2828129203Scognet return subFloat64Sigs( a, b, aSign ); 2829129203Scognet } 2830129203Scognet 2831129203Scognet} 2832129203Scognet 2833129203Scognet/* 2834129203Scognet------------------------------------------------------------------------------- 2835129203ScognetReturns the result of subtracting the double-precision floating-point values 2836129203Scognet`a' and `b'. The operation is performed according to the IEC/IEEE Standard 2837129203Scognetfor Binary Floating-Point Arithmetic. 2838129203Scognet------------------------------------------------------------------------------- 2839129203Scognet*/ 2840129203Scognetfloat64 float64_sub( float64 a, float64 b ) 2841129203Scognet{ 2842129203Scognet flag aSign, bSign; 2843129203Scognet 2844129203Scognet aSign = extractFloat64Sign( a ); 2845129203Scognet bSign = extractFloat64Sign( b ); 2846129203Scognet if ( aSign == bSign ) { 2847129203Scognet return subFloat64Sigs( a, b, aSign ); 2848129203Scognet } 2849129203Scognet else { 2850129203Scognet return addFloat64Sigs( a, b, aSign ); 2851129203Scognet } 2852129203Scognet 2853129203Scognet} 2854129203Scognet 2855129203Scognet/* 2856129203Scognet------------------------------------------------------------------------------- 2857129203ScognetReturns the result of multiplying the double-precision floating-point values 2858129203Scognet`a' and `b'. The operation is performed according to the IEC/IEEE Standard 2859129203Scognetfor Binary Floating-Point Arithmetic. 2860129203Scognet------------------------------------------------------------------------------- 2861129203Scognet*/ 2862129203Scognetfloat64 float64_mul( float64 a, float64 b ) 2863129203Scognet{ 2864129203Scognet flag aSign, bSign, zSign; 2865129203Scognet int16 aExp, bExp, zExp; 2866129203Scognet bits64 aSig, bSig, zSig0, zSig1; 2867129203Scognet 2868129203Scognet aSig = extractFloat64Frac( a ); 2869129203Scognet aExp = extractFloat64Exp( a ); 2870129203Scognet aSign = extractFloat64Sign( a ); 2871129203Scognet bSig = extractFloat64Frac( b ); 2872129203Scognet bExp = extractFloat64Exp( b ); 2873129203Scognet bSign = extractFloat64Sign( b ); 2874129203Scognet zSign = aSign ^ bSign; 2875129203Scognet if ( aExp == 0x7FF ) { 2876129203Scognet if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { 2877129203Scognet return propagateFloat64NaN( a, b ); 2878129203Scognet } 2879129203Scognet if ( ( bExp | bSig ) == 0 ) { 2880129203Scognet float_raise( float_flag_invalid ); 2881129203Scognet return float64_default_nan; 2882129203Scognet } 2883129203Scognet return packFloat64( zSign, 0x7FF, 0 ); 2884129203Scognet } 2885129203Scognet if ( bExp == 0x7FF ) { 2886129203Scognet if ( bSig ) return propagateFloat64NaN( a, b ); 2887129203Scognet if ( ( aExp | aSig ) == 0 ) { 2888129203Scognet float_raise( float_flag_invalid ); 2889129203Scognet return float64_default_nan; 2890129203Scognet } 2891129203Scognet return packFloat64( zSign, 0x7FF, 0 ); 2892129203Scognet } 2893129203Scognet if ( aExp == 0 ) { 2894129203Scognet if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); 2895129203Scognet normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2896129203Scognet } 2897129203Scognet if ( bExp == 0 ) { 2898129203Scognet if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); 2899129203Scognet normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 2900129203Scognet } 2901129203Scognet zExp = aExp + bExp - 0x3FF; 2902129203Scognet aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; 2903129203Scognet bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 2904129203Scognet mul64To128( aSig, bSig, &zSig0, &zSig1 ); 2905129203Scognet zSig0 |= ( zSig1 != 0 ); 2906129203Scognet if ( 0 <= (sbits64) ( zSig0<<1 ) ) { 2907129203Scognet zSig0 <<= 1; 2908129203Scognet --zExp; 2909129203Scognet } 2910129203Scognet return roundAndPackFloat64( zSign, zExp, zSig0 ); 2911129203Scognet 2912129203Scognet} 2913129203Scognet 2914129203Scognet/* 2915129203Scognet------------------------------------------------------------------------------- 2916129203ScognetReturns the result of dividing the double-precision floating-point value `a' 2917129203Scognetby the corresponding value `b'. The operation is performed according to 2918129203Scognetthe IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2919129203Scognet------------------------------------------------------------------------------- 2920129203Scognet*/ 2921129203Scognetfloat64 float64_div( float64 a, float64 b ) 2922129203Scognet{ 2923129203Scognet flag aSign, bSign, zSign; 2924129203Scognet int16 aExp, bExp, zExp; 2925129203Scognet bits64 aSig, bSig, zSig; 2926129203Scognet bits64 rem0, rem1; 2927129203Scognet bits64 term0, term1; 2928129203Scognet 2929129203Scognet aSig = extractFloat64Frac( a ); 2930129203Scognet aExp = extractFloat64Exp( a ); 2931129203Scognet aSign = extractFloat64Sign( a ); 2932129203Scognet bSig = extractFloat64Frac( b ); 2933129203Scognet bExp = extractFloat64Exp( b ); 2934129203Scognet bSign = extractFloat64Sign( b ); 2935129203Scognet zSign = aSign ^ bSign; 2936129203Scognet if ( aExp == 0x7FF ) { 2937129203Scognet if ( aSig ) return propagateFloat64NaN( a, b ); 2938129203Scognet if ( bExp == 0x7FF ) { 2939129203Scognet if ( bSig ) return propagateFloat64NaN( a, b ); 2940129203Scognet float_raise( float_flag_invalid ); 2941129203Scognet return float64_default_nan; 2942129203Scognet } 2943129203Scognet return packFloat64( zSign, 0x7FF, 0 ); 2944129203Scognet } 2945129203Scognet if ( bExp == 0x7FF ) { 2946129203Scognet if ( bSig ) return propagateFloat64NaN( a, b ); 2947129203Scognet return packFloat64( zSign, 0, 0 ); 2948129203Scognet } 2949129203Scognet if ( bExp == 0 ) { 2950129203Scognet if ( bSig == 0 ) { 2951129203Scognet if ( ( aExp | aSig ) == 0 ) { 2952129203Scognet float_raise( float_flag_invalid ); 2953129203Scognet return float64_default_nan; 2954129203Scognet } 2955129203Scognet float_raise( float_flag_divbyzero ); 2956129203Scognet return packFloat64( zSign, 0x7FF, 0 ); 2957129203Scognet } 2958129203Scognet normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 2959129203Scognet } 2960129203Scognet if ( aExp == 0 ) { 2961129203Scognet if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); 2962129203Scognet normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2963129203Scognet } 2964129203Scognet zExp = aExp - bExp + 0x3FD; 2965129203Scognet aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; 2966129203Scognet bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 2967129203Scognet if ( bSig <= ( aSig + aSig ) ) { 2968129203Scognet aSig >>= 1; 2969129203Scognet ++zExp; 2970129203Scognet } 2971129203Scognet zSig = estimateDiv128To64( aSig, 0, bSig ); 2972129203Scognet if ( ( zSig & 0x1FF ) <= 2 ) { 2973129203Scognet mul64To128( bSig, zSig, &term0, &term1 ); 2974129203Scognet sub128( aSig, 0, term0, term1, &rem0, &rem1 ); 2975129203Scognet while ( (sbits64) rem0 < 0 ) { 2976129203Scognet --zSig; 2977129203Scognet add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 2978129203Scognet } 2979129203Scognet zSig |= ( rem1 != 0 ); 2980129203Scognet } 2981129203Scognet return roundAndPackFloat64( zSign, zExp, zSig ); 2982129203Scognet 2983129203Scognet} 2984129203Scognet 2985129203Scognet#ifndef SOFTFLOAT_FOR_GCC 2986129203Scognet/* 2987129203Scognet------------------------------------------------------------------------------- 2988129203ScognetReturns the remainder of the double-precision floating-point value `a' 2989129203Scognetwith respect to the corresponding value `b'. The operation is performed 2990129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2991129203Scognet------------------------------------------------------------------------------- 2992129203Scognet*/ 2993129203Scognetfloat64 float64_rem( float64 a, float64 b ) 2994129203Scognet{ 2995129203Scognet flag aSign, bSign, zSign; 2996129203Scognet int16 aExp, bExp, expDiff; 2997129203Scognet bits64 aSig, bSig; 2998129203Scognet bits64 q, alternateASig; 2999129203Scognet sbits64 sigMean; 3000129203Scognet 3001129203Scognet aSig = extractFloat64Frac( a ); 3002129203Scognet aExp = extractFloat64Exp( a ); 3003129203Scognet aSign = extractFloat64Sign( a ); 3004129203Scognet bSig = extractFloat64Frac( b ); 3005129203Scognet bExp = extractFloat64Exp( b ); 3006129203Scognet bSign = extractFloat64Sign( b ); 3007129203Scognet if ( aExp == 0x7FF ) { 3008129203Scognet if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { 3009129203Scognet return propagateFloat64NaN( a, b ); 3010129203Scognet } 3011129203Scognet float_raise( float_flag_invalid ); 3012129203Scognet return float64_default_nan; 3013129203Scognet } 3014129203Scognet if ( bExp == 0x7FF ) { 3015129203Scognet if ( bSig ) return propagateFloat64NaN( a, b ); 3016129203Scognet return a; 3017129203Scognet } 3018129203Scognet if ( bExp == 0 ) { 3019129203Scognet if ( bSig == 0 ) { 3020129203Scognet float_raise( float_flag_invalid ); 3021129203Scognet return float64_default_nan; 3022129203Scognet } 3023129203Scognet normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 3024129203Scognet } 3025129203Scognet if ( aExp == 0 ) { 3026129203Scognet if ( aSig == 0 ) return a; 3027129203Scognet normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 3028129203Scognet } 3029129203Scognet expDiff = aExp - bExp; 3030129203Scognet aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; 3031129203Scognet bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 3032129203Scognet if ( expDiff < 0 ) { 3033129203Scognet if ( expDiff < -1 ) return a; 3034129203Scognet aSig >>= 1; 3035129203Scognet } 3036129203Scognet q = ( bSig <= aSig ); 3037129203Scognet if ( q ) aSig -= bSig; 3038129203Scognet expDiff -= 64; 3039129203Scognet while ( 0 < expDiff ) { 3040129203Scognet q = estimateDiv128To64( aSig, 0, bSig ); 3041129203Scognet q = ( 2 < q ) ? q - 2 : 0; 3042129203Scognet aSig = - ( ( bSig>>2 ) * q ); 3043129203Scognet expDiff -= 62; 3044129203Scognet } 3045129203Scognet expDiff += 64; 3046129203Scognet if ( 0 < expDiff ) { 3047129203Scognet q = estimateDiv128To64( aSig, 0, bSig ); 3048129203Scognet q = ( 2 < q ) ? q - 2 : 0; 3049129203Scognet q >>= 64 - expDiff; 3050129203Scognet bSig >>= 2; 3051129203Scognet aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 3052129203Scognet } 3053129203Scognet else { 3054129203Scognet aSig >>= 2; 3055129203Scognet bSig >>= 2; 3056129203Scognet } 3057129203Scognet do { 3058129203Scognet alternateASig = aSig; 3059129203Scognet ++q; 3060129203Scognet aSig -= bSig; 3061129203Scognet } while ( 0 <= (sbits64) aSig ); 3062129203Scognet sigMean = aSig + alternateASig; 3063129203Scognet if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 3064129203Scognet aSig = alternateASig; 3065129203Scognet } 3066129203Scognet zSign = ( (sbits64) aSig < 0 ); 3067129203Scognet if ( zSign ) aSig = - aSig; 3068129203Scognet return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); 3069129203Scognet 3070129203Scognet} 3071129203Scognet 3072129203Scognet/* 3073129203Scognet------------------------------------------------------------------------------- 3074129203ScognetReturns the square root of the double-precision floating-point value `a'. 3075129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary 3076129203ScognetFloating-Point Arithmetic. 3077129203Scognet------------------------------------------------------------------------------- 3078129203Scognet*/ 3079129203Scognetfloat64 float64_sqrt( float64 a ) 3080129203Scognet{ 3081129203Scognet flag aSign; 3082129203Scognet int16 aExp, zExp; 3083129203Scognet bits64 aSig, zSig, doubleZSig; 3084129203Scognet bits64 rem0, rem1, term0, term1; 3085129203Scognet 3086129203Scognet aSig = extractFloat64Frac( a ); 3087129203Scognet aExp = extractFloat64Exp( a ); 3088129203Scognet aSign = extractFloat64Sign( a ); 3089129203Scognet if ( aExp == 0x7FF ) { 3090129203Scognet if ( aSig ) return propagateFloat64NaN( a, a ); 3091129203Scognet if ( ! aSign ) return a; 3092129203Scognet float_raise( float_flag_invalid ); 3093129203Scognet return float64_default_nan; 3094129203Scognet } 3095129203Scognet if ( aSign ) { 3096129203Scognet if ( ( aExp | aSig ) == 0 ) return a; 3097129203Scognet float_raise( float_flag_invalid ); 3098129203Scognet return float64_default_nan; 3099129203Scognet } 3100129203Scognet if ( aExp == 0 ) { 3101129203Scognet if ( aSig == 0 ) return 0; 3102129203Scognet normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 3103129203Scognet } 3104129203Scognet zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; 3105129203Scognet aSig |= LIT64( 0x0010000000000000 ); 3106129203Scognet zSig = estimateSqrt32( aExp, aSig>>21 ); 3107129203Scognet aSig <<= 9 - ( aExp & 1 ); 3108129203Scognet zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 ); 3109129203Scognet if ( ( zSig & 0x1FF ) <= 5 ) { 3110129203Scognet doubleZSig = zSig<<1; 3111129203Scognet mul64To128( zSig, zSig, &term0, &term1 ); 3112129203Scognet sub128( aSig, 0, term0, term1, &rem0, &rem1 ); 3113129203Scognet while ( (sbits64) rem0 < 0 ) { 3114129203Scognet --zSig; 3115129203Scognet doubleZSig -= 2; 3116129203Scognet add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 ); 3117129203Scognet } 3118129203Scognet zSig |= ( ( rem0 | rem1 ) != 0 ); 3119129203Scognet } 3120129203Scognet return roundAndPackFloat64( 0, zExp, zSig ); 3121129203Scognet 3122129203Scognet} 3123129203Scognet#endif 3124129203Scognet 3125129203Scognet/* 3126129203Scognet------------------------------------------------------------------------------- 3127129203ScognetReturns 1 if the double-precision floating-point value `a' is equal to the 3128129203Scognetcorresponding value `b', and 0 otherwise. The comparison is performed 3129129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3130129203Scognet------------------------------------------------------------------------------- 3131129203Scognet*/ 3132129203Scognetflag float64_eq( float64 a, float64 b ) 3133129203Scognet{ 3134129203Scognet 3135129203Scognet if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3136129203Scognet || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3137129203Scognet ) { 3138129203Scognet if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 3139129203Scognet float_raise( float_flag_invalid ); 3140129203Scognet } 3141129203Scognet return 0; 3142129203Scognet } 3143129203Scognet return ( a == b ) || 3144129203Scognet ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); 3145129203Scognet 3146129203Scognet} 3147129203Scognet 3148129203Scognet/* 3149129203Scognet------------------------------------------------------------------------------- 3150129203ScognetReturns 1 if the double-precision floating-point value `a' is less than or 3151129203Scognetequal to the corresponding value `b', and 0 otherwise. The comparison is 3152129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 3153129203ScognetArithmetic. 3154129203Scognet------------------------------------------------------------------------------- 3155129203Scognet*/ 3156129203Scognetflag float64_le( float64 a, float64 b ) 3157129203Scognet{ 3158129203Scognet flag aSign, bSign; 3159129203Scognet 3160129203Scognet if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3161129203Scognet || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3162129203Scognet ) { 3163129203Scognet float_raise( float_flag_invalid ); 3164129203Scognet return 0; 3165129203Scognet } 3166129203Scognet aSign = extractFloat64Sign( a ); 3167129203Scognet bSign = extractFloat64Sign( b ); 3168129203Scognet if ( aSign != bSign ) 3169129203Scognet return aSign || 3170129203Scognet ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 3171129203Scognet 0 ); 3172129203Scognet return ( a == b ) || 3173129203Scognet ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 3174129203Scognet 3175129203Scognet} 3176129203Scognet 3177129203Scognet/* 3178129203Scognet------------------------------------------------------------------------------- 3179129203ScognetReturns 1 if the double-precision floating-point value `a' is less than 3180129203Scognetthe corresponding value `b', and 0 otherwise. The comparison is performed 3181129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3182129203Scognet------------------------------------------------------------------------------- 3183129203Scognet*/ 3184129203Scognetflag float64_lt( float64 a, float64 b ) 3185129203Scognet{ 3186129203Scognet flag aSign, bSign; 3187129203Scognet 3188129203Scognet if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3189129203Scognet || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3190129203Scognet ) { 3191129203Scognet float_raise( float_flag_invalid ); 3192129203Scognet return 0; 3193129203Scognet } 3194129203Scognet aSign = extractFloat64Sign( a ); 3195129203Scognet bSign = extractFloat64Sign( b ); 3196129203Scognet if ( aSign != bSign ) 3197129203Scognet return aSign && 3198129203Scognet ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != 3199129203Scognet 0 ); 3200129203Scognet return ( a != b ) && 3201129203Scognet ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 3202129203Scognet 3203129203Scognet} 3204129203Scognet 3205129203Scognet#ifndef SOFTFLOAT_FOR_GCC 3206129203Scognet/* 3207129203Scognet------------------------------------------------------------------------------- 3208129203ScognetReturns 1 if the double-precision floating-point value `a' is equal to the 3209129203Scognetcorresponding value `b', and 0 otherwise. The invalid exception is raised 3210129203Scognetif either operand is a NaN. Otherwise, the comparison is performed 3211129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3212129203Scognet------------------------------------------------------------------------------- 3213129203Scognet*/ 3214129203Scognetflag float64_eq_signaling( float64 a, float64 b ) 3215129203Scognet{ 3216129203Scognet 3217129203Scognet if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3218129203Scognet || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3219129203Scognet ) { 3220129203Scognet float_raise( float_flag_invalid ); 3221129203Scognet return 0; 3222129203Scognet } 3223129203Scognet return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); 3224129203Scognet 3225129203Scognet} 3226129203Scognet 3227129203Scognet/* 3228129203Scognet------------------------------------------------------------------------------- 3229129203ScognetReturns 1 if the double-precision floating-point value `a' is less than or 3230129203Scognetequal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 3231129203Scognetcause an exception. Otherwise, the comparison is performed according to the 3232129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 3233129203Scognet------------------------------------------------------------------------------- 3234129203Scognet*/ 3235129203Scognetflag float64_le_quiet( float64 a, float64 b ) 3236129203Scognet{ 3237129203Scognet flag aSign, bSign; 3238129203Scognet 3239129203Scognet if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3240129203Scognet || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3241129203Scognet ) { 3242129203Scognet if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 3243129203Scognet float_raise( float_flag_invalid ); 3244129203Scognet } 3245129203Scognet return 0; 3246129203Scognet } 3247129203Scognet aSign = extractFloat64Sign( a ); 3248129203Scognet bSign = extractFloat64Sign( b ); 3249129203Scognet if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); 3250129203Scognet return ( a == b ) || ( aSign ^ ( a < b ) ); 3251129203Scognet 3252129203Scognet} 3253129203Scognet 3254129203Scognet/* 3255129203Scognet------------------------------------------------------------------------------- 3256129203ScognetReturns 1 if the double-precision floating-point value `a' is less than 3257129203Scognetthe corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 3258129203Scognetexception. Otherwise, the comparison is performed according to the IEC/IEEE 3259129203ScognetStandard for Binary Floating-Point Arithmetic. 3260129203Scognet------------------------------------------------------------------------------- 3261129203Scognet*/ 3262129203Scognetflag float64_lt_quiet( float64 a, float64 b ) 3263129203Scognet{ 3264129203Scognet flag aSign, bSign; 3265129203Scognet 3266129203Scognet if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3267129203Scognet || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3268129203Scognet ) { 3269129203Scognet if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 3270129203Scognet float_raise( float_flag_invalid ); 3271129203Scognet } 3272129203Scognet return 0; 3273129203Scognet } 3274129203Scognet aSign = extractFloat64Sign( a ); 3275129203Scognet bSign = extractFloat64Sign( b ); 3276129203Scognet if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); 3277129203Scognet return ( a != b ) && ( aSign ^ ( a < b ) ); 3278129203Scognet 3279129203Scognet} 3280129203Scognet#endif 3281129203Scognet 3282129203Scognet#ifdef FLOATX80 3283129203Scognet 3284129203Scognet/* 3285129203Scognet------------------------------------------------------------------------------- 3286129203ScognetReturns the result of converting the extended double-precision floating- 3287129203Scognetpoint value `a' to the 32-bit two's complement integer format. The 3288129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3289129203ScognetFloating-Point Arithmetic---which means in particular that the conversion 3290129203Scognetis rounded according to the current rounding mode. If `a' is a NaN, the 3291129203Scognetlargest positive integer is returned. Otherwise, if the conversion 3292129203Scognetoverflows, the largest integer with the same sign as `a' is returned. 3293129203Scognet------------------------------------------------------------------------------- 3294129203Scognet*/ 3295129203Scognetint32 floatx80_to_int32( floatx80 a ) 3296129203Scognet{ 3297129203Scognet flag aSign; 3298129203Scognet int32 aExp, shiftCount; 3299129203Scognet bits64 aSig; 3300129203Scognet 3301129203Scognet aSig = extractFloatx80Frac( a ); 3302129203Scognet aExp = extractFloatx80Exp( a ); 3303129203Scognet aSign = extractFloatx80Sign( a ); 3304129203Scognet if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; 3305129203Scognet shiftCount = 0x4037 - aExp; 3306129203Scognet if ( shiftCount <= 0 ) shiftCount = 1; 3307129203Scognet shift64RightJamming( aSig, shiftCount, &aSig ); 3308129203Scognet return roundAndPackInt32( aSign, aSig ); 3309129203Scognet 3310129203Scognet} 3311129203Scognet 3312129203Scognet/* 3313129203Scognet------------------------------------------------------------------------------- 3314129203ScognetReturns the result of converting the extended double-precision floating- 3315129203Scognetpoint value `a' to the 32-bit two's complement integer format. The 3316129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3317129203ScognetFloating-Point Arithmetic, except that the conversion is always rounded 3318129203Scognettoward zero. If `a' is a NaN, the largest positive integer is returned. 3319129203ScognetOtherwise, if the conversion overflows, the largest integer with the same 3320129203Scognetsign as `a' is returned. 3321129203Scognet------------------------------------------------------------------------------- 3322129203Scognet*/ 3323129203Scognetint32 floatx80_to_int32_round_to_zero( floatx80 a ) 3324129203Scognet{ 3325129203Scognet flag aSign; 3326129203Scognet int32 aExp, shiftCount; 3327129203Scognet bits64 aSig, savedASig; 3328129203Scognet int32 z; 3329129203Scognet 3330129203Scognet aSig = extractFloatx80Frac( a ); 3331129203Scognet aExp = extractFloatx80Exp( a ); 3332129203Scognet aSign = extractFloatx80Sign( a ); 3333129203Scognet if ( 0x401E < aExp ) { 3334129203Scognet if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; 3335129203Scognet goto invalid; 3336129203Scognet } 3337129203Scognet else if ( aExp < 0x3FFF ) { 3338129203Scognet if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; 3339129203Scognet return 0; 3340129203Scognet } 3341129203Scognet shiftCount = 0x403E - aExp; 3342129203Scognet savedASig = aSig; 3343129203Scognet aSig >>= shiftCount; 3344129203Scognet z = aSig; 3345129203Scognet if ( aSign ) z = - z; 3346129203Scognet if ( ( z < 0 ) ^ aSign ) { 3347129203Scognet invalid: 3348129203Scognet float_raise( float_flag_invalid ); 3349129203Scognet return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 3350129203Scognet } 3351129203Scognet if ( ( aSig<<shiftCount ) != savedASig ) { 3352129203Scognet float_exception_flags |= float_flag_inexact; 3353129203Scognet } 3354129203Scognet return z; 3355129203Scognet 3356129203Scognet} 3357129203Scognet 3358129203Scognet/* 3359129203Scognet------------------------------------------------------------------------------- 3360129203ScognetReturns the result of converting the extended double-precision floating- 3361129203Scognetpoint value `a' to the 64-bit two's complement integer format. The 3362129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3363129203ScognetFloating-Point Arithmetic---which means in particular that the conversion 3364129203Scognetis rounded according to the current rounding mode. If `a' is a NaN, 3365129203Scognetthe largest positive integer is returned. Otherwise, if the conversion 3366129203Scognetoverflows, the largest integer with the same sign as `a' is returned. 3367129203Scognet------------------------------------------------------------------------------- 3368129203Scognet*/ 3369129203Scognetint64 floatx80_to_int64( floatx80 a ) 3370129203Scognet{ 3371129203Scognet flag aSign; 3372129203Scognet int32 aExp, shiftCount; 3373129203Scognet bits64 aSig, aSigExtra; 3374129203Scognet 3375129203Scognet aSig = extractFloatx80Frac( a ); 3376129203Scognet aExp = extractFloatx80Exp( a ); 3377129203Scognet aSign = extractFloatx80Sign( a ); 3378129203Scognet shiftCount = 0x403E - aExp; 3379129203Scognet if ( shiftCount <= 0 ) { 3380129203Scognet if ( shiftCount ) { 3381129203Scognet float_raise( float_flag_invalid ); 3382129203Scognet if ( ! aSign 3383129203Scognet || ( ( aExp == 0x7FFF ) 3384129203Scognet && ( aSig != LIT64( 0x8000000000000000 ) ) ) 3385129203Scognet ) { 3386129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 3387129203Scognet } 3388129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 3389129203Scognet } 3390129203Scognet aSigExtra = 0; 3391129203Scognet } 3392129203Scognet else { 3393129203Scognet shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); 3394129203Scognet } 3395129203Scognet return roundAndPackInt64( aSign, aSig, aSigExtra ); 3396129203Scognet 3397129203Scognet} 3398129203Scognet 3399129203Scognet/* 3400129203Scognet------------------------------------------------------------------------------- 3401129203ScognetReturns the result of converting the extended double-precision floating- 3402129203Scognetpoint value `a' to the 64-bit two's complement integer format. The 3403129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3404129203ScognetFloating-Point Arithmetic, except that the conversion is always rounded 3405129203Scognettoward zero. If `a' is a NaN, the largest positive integer is returned. 3406129203ScognetOtherwise, if the conversion overflows, the largest integer with the same 3407129203Scognetsign as `a' is returned. 3408129203Scognet------------------------------------------------------------------------------- 3409129203Scognet*/ 3410129203Scognetint64 floatx80_to_int64_round_to_zero( floatx80 a ) 3411129203Scognet{ 3412129203Scognet flag aSign; 3413129203Scognet int32 aExp, shiftCount; 3414129203Scognet bits64 aSig; 3415129203Scognet int64 z; 3416129203Scognet 3417129203Scognet aSig = extractFloatx80Frac( a ); 3418129203Scognet aExp = extractFloatx80Exp( a ); 3419129203Scognet aSign = extractFloatx80Sign( a ); 3420129203Scognet shiftCount = aExp - 0x403E; 3421129203Scognet if ( 0 <= shiftCount ) { 3422129203Scognet aSig &= LIT64( 0x7FFFFFFFFFFFFFFF ); 3423129203Scognet if ( ( a.high != 0xC03E ) || aSig ) { 3424129203Scognet float_raise( float_flag_invalid ); 3425129203Scognet if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) { 3426129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 3427129203Scognet } 3428129203Scognet } 3429129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 3430129203Scognet } 3431129203Scognet else if ( aExp < 0x3FFF ) { 3432129203Scognet if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 3433129203Scognet return 0; 3434129203Scognet } 3435129203Scognet z = aSig>>( - shiftCount ); 3436129203Scognet if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { 3437129203Scognet float_exception_flags |= float_flag_inexact; 3438129203Scognet } 3439129203Scognet if ( aSign ) z = - z; 3440129203Scognet return z; 3441129203Scognet 3442129203Scognet} 3443129203Scognet 3444129203Scognet/* 3445129203Scognet------------------------------------------------------------------------------- 3446129203ScognetReturns the result of converting the extended double-precision floating- 3447129203Scognetpoint value `a' to the single-precision floating-point format. The 3448129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3449129203ScognetFloating-Point Arithmetic. 3450129203Scognet------------------------------------------------------------------------------- 3451129203Scognet*/ 3452129203Scognetfloat32 floatx80_to_float32( floatx80 a ) 3453129203Scognet{ 3454129203Scognet flag aSign; 3455129203Scognet int32 aExp; 3456129203Scognet bits64 aSig; 3457129203Scognet 3458129203Scognet aSig = extractFloatx80Frac( a ); 3459129203Scognet aExp = extractFloatx80Exp( a ); 3460129203Scognet aSign = extractFloatx80Sign( a ); 3461129203Scognet if ( aExp == 0x7FFF ) { 3462129203Scognet if ( (bits64) ( aSig<<1 ) ) { 3463129203Scognet return commonNaNToFloat32( floatx80ToCommonNaN( a ) ); 3464129203Scognet } 3465129203Scognet return packFloat32( aSign, 0xFF, 0 ); 3466129203Scognet } 3467129203Scognet shift64RightJamming( aSig, 33, &aSig ); 3468129203Scognet if ( aExp || aSig ) aExp -= 0x3F81; 3469129203Scognet return roundAndPackFloat32( aSign, aExp, aSig ); 3470129203Scognet 3471129203Scognet} 3472129203Scognet 3473129203Scognet/* 3474129203Scognet------------------------------------------------------------------------------- 3475129203ScognetReturns the result of converting the extended double-precision floating- 3476129203Scognetpoint value `a' to the double-precision floating-point format. The 3477129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3478129203ScognetFloating-Point Arithmetic. 3479129203Scognet------------------------------------------------------------------------------- 3480129203Scognet*/ 3481129203Scognetfloat64 floatx80_to_float64( floatx80 a ) 3482129203Scognet{ 3483129203Scognet flag aSign; 3484129203Scognet int32 aExp; 3485129203Scognet bits64 aSig, zSig; 3486129203Scognet 3487129203Scognet aSig = extractFloatx80Frac( a ); 3488129203Scognet aExp = extractFloatx80Exp( a ); 3489129203Scognet aSign = extractFloatx80Sign( a ); 3490129203Scognet if ( aExp == 0x7FFF ) { 3491129203Scognet if ( (bits64) ( aSig<<1 ) ) { 3492129203Scognet return commonNaNToFloat64( floatx80ToCommonNaN( a ) ); 3493129203Scognet } 3494129203Scognet return packFloat64( aSign, 0x7FF, 0 ); 3495129203Scognet } 3496129203Scognet shift64RightJamming( aSig, 1, &zSig ); 3497129203Scognet if ( aExp || aSig ) aExp -= 0x3C01; 3498129203Scognet return roundAndPackFloat64( aSign, aExp, zSig ); 3499129203Scognet 3500129203Scognet} 3501129203Scognet 3502129203Scognet#ifdef FLOAT128 3503129203Scognet 3504129203Scognet/* 3505129203Scognet------------------------------------------------------------------------------- 3506129203ScognetReturns the result of converting the extended double-precision floating- 3507129203Scognetpoint value `a' to the quadruple-precision floating-point format. The 3508129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3509129203ScognetFloating-Point Arithmetic. 3510129203Scognet------------------------------------------------------------------------------- 3511129203Scognet*/ 3512129203Scognetfloat128 floatx80_to_float128( floatx80 a ) 3513129203Scognet{ 3514129203Scognet flag aSign; 3515129203Scognet int16 aExp; 3516129203Scognet bits64 aSig, zSig0, zSig1; 3517129203Scognet 3518129203Scognet aSig = extractFloatx80Frac( a ); 3519129203Scognet aExp = extractFloatx80Exp( a ); 3520129203Scognet aSign = extractFloatx80Sign( a ); 3521129203Scognet if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) { 3522129203Scognet return commonNaNToFloat128( floatx80ToCommonNaN( a ) ); 3523129203Scognet } 3524129203Scognet shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); 3525129203Scognet return packFloat128( aSign, aExp, zSig0, zSig1 ); 3526129203Scognet 3527129203Scognet} 3528129203Scognet 3529129203Scognet#endif 3530129203Scognet 3531129203Scognet/* 3532129203Scognet------------------------------------------------------------------------------- 3533129203ScognetRounds the extended double-precision floating-point value `a' to an integer, 3534129203Scognetand returns the result as an extended quadruple-precision floating-point 3535129203Scognetvalue. The operation is performed according to the IEC/IEEE Standard for 3536129203ScognetBinary Floating-Point Arithmetic. 3537129203Scognet------------------------------------------------------------------------------- 3538129203Scognet*/ 3539129203Scognetfloatx80 floatx80_round_to_int( floatx80 a ) 3540129203Scognet{ 3541129203Scognet flag aSign; 3542129203Scognet int32 aExp; 3543129203Scognet bits64 lastBitMask, roundBitsMask; 3544129203Scognet int8 roundingMode; 3545129203Scognet floatx80 z; 3546129203Scognet 3547129203Scognet aExp = extractFloatx80Exp( a ); 3548129203Scognet if ( 0x403E <= aExp ) { 3549129203Scognet if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { 3550129203Scognet return propagateFloatx80NaN( a, a ); 3551129203Scognet } 3552129203Scognet return a; 3553129203Scognet } 3554129203Scognet if ( aExp < 0x3FFF ) { 3555129203Scognet if ( ( aExp == 0 ) 3556129203Scognet && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { 3557129203Scognet return a; 3558129203Scognet } 3559129203Scognet float_exception_flags |= float_flag_inexact; 3560129203Scognet aSign = extractFloatx80Sign( a ); 3561129203Scognet switch ( float_rounding_mode ) { 3562129203Scognet case float_round_nearest_even: 3563129203Scognet if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) 3564129203Scognet ) { 3565129203Scognet return 3566129203Scognet packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); 3567129203Scognet } 3568129203Scognet break; 3569129203Scognet case float_round_to_zero: 3570129203Scognet break; 3571129203Scognet case float_round_down: 3572129203Scognet return 3573129203Scognet aSign ? 3574129203Scognet packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) 3575129203Scognet : packFloatx80( 0, 0, 0 ); 3576129203Scognet case float_round_up: 3577129203Scognet return 3578129203Scognet aSign ? packFloatx80( 1, 0, 0 ) 3579129203Scognet : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); 3580129203Scognet } 3581129203Scognet return packFloatx80( aSign, 0, 0 ); 3582129203Scognet } 3583129203Scognet lastBitMask = 1; 3584129203Scognet lastBitMask <<= 0x403E - aExp; 3585129203Scognet roundBitsMask = lastBitMask - 1; 3586129203Scognet z = a; 3587129203Scognet roundingMode = float_rounding_mode; 3588129203Scognet if ( roundingMode == float_round_nearest_even ) { 3589129203Scognet z.low += lastBitMask>>1; 3590129203Scognet if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 3591129203Scognet } 3592129203Scognet else if ( roundingMode != float_round_to_zero ) { 3593129203Scognet if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { 3594129203Scognet z.low += roundBitsMask; 3595129203Scognet } 3596129203Scognet } 3597129203Scognet z.low &= ~ roundBitsMask; 3598129203Scognet if ( z.low == 0 ) { 3599129203Scognet ++z.high; 3600129203Scognet z.low = LIT64( 0x8000000000000000 ); 3601129203Scognet } 3602129203Scognet if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; 3603129203Scognet return z; 3604129203Scognet 3605129203Scognet} 3606129203Scognet 3607129203Scognet/* 3608129203Scognet------------------------------------------------------------------------------- 3609129203ScognetReturns the result of adding the absolute values of the extended double- 3610129203Scognetprecision floating-point values `a' and `b'. If `zSign' is 1, the sum is 3611129203Scognetnegated before being returned. `zSign' is ignored if the result is a NaN. 3612129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary 3613129203ScognetFloating-Point Arithmetic. 3614129203Scognet------------------------------------------------------------------------------- 3615129203Scognet*/ 3616129203Scognetstatic floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) 3617129203Scognet{ 3618129203Scognet int32 aExp, bExp, zExp; 3619129203Scognet bits64 aSig, bSig, zSig0, zSig1; 3620129203Scognet int32 expDiff; 3621129203Scognet 3622129203Scognet aSig = extractFloatx80Frac( a ); 3623129203Scognet aExp = extractFloatx80Exp( a ); 3624129203Scognet bSig = extractFloatx80Frac( b ); 3625129203Scognet bExp = extractFloatx80Exp( b ); 3626129203Scognet expDiff = aExp - bExp; 3627129203Scognet if ( 0 < expDiff ) { 3628129203Scognet if ( aExp == 0x7FFF ) { 3629129203Scognet if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3630129203Scognet return a; 3631129203Scognet } 3632129203Scognet if ( bExp == 0 ) --expDiff; 3633129203Scognet shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 3634129203Scognet zExp = aExp; 3635129203Scognet } 3636129203Scognet else if ( expDiff < 0 ) { 3637129203Scognet if ( bExp == 0x7FFF ) { 3638129203Scognet if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3639129203Scognet return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3640129203Scognet } 3641129203Scognet if ( aExp == 0 ) ++expDiff; 3642129203Scognet shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 3643129203Scognet zExp = bExp; 3644129203Scognet } 3645129203Scognet else { 3646129203Scognet if ( aExp == 0x7FFF ) { 3647129203Scognet if ( (bits64) ( ( aSig | bSig )<<1 ) ) { 3648129203Scognet return propagateFloatx80NaN( a, b ); 3649129203Scognet } 3650129203Scognet return a; 3651129203Scognet } 3652129203Scognet zSig1 = 0; 3653129203Scognet zSig0 = aSig + bSig; 3654129203Scognet if ( aExp == 0 ) { 3655129203Scognet normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); 3656129203Scognet goto roundAndPack; 3657129203Scognet } 3658129203Scognet zExp = aExp; 3659129203Scognet goto shiftRight1; 3660129203Scognet } 3661129203Scognet zSig0 = aSig + bSig; 3662129203Scognet if ( (sbits64) zSig0 < 0 ) goto roundAndPack; 3663129203Scognet shiftRight1: 3664129203Scognet shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); 3665129203Scognet zSig0 |= LIT64( 0x8000000000000000 ); 3666129203Scognet ++zExp; 3667129203Scognet roundAndPack: 3668129203Scognet return 3669129203Scognet roundAndPackFloatx80( 3670129203Scognet floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3671129203Scognet 3672129203Scognet} 3673129203Scognet 3674129203Scognet/* 3675129203Scognet------------------------------------------------------------------------------- 3676129203ScognetReturns the result of subtracting the absolute values of the extended 3677129203Scognetdouble-precision floating-point values `a' and `b'. If `zSign' is 1, the 3678129203Scognetdifference is negated before being returned. `zSign' is ignored if the 3679129203Scognetresult is a NaN. The subtraction is performed according to the IEC/IEEE 3680129203ScognetStandard for Binary Floating-Point Arithmetic. 3681129203Scognet------------------------------------------------------------------------------- 3682129203Scognet*/ 3683129203Scognetstatic floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) 3684129203Scognet{ 3685129203Scognet int32 aExp, bExp, zExp; 3686129203Scognet bits64 aSig, bSig, zSig0, zSig1; 3687129203Scognet int32 expDiff; 3688129203Scognet floatx80 z; 3689129203Scognet 3690129203Scognet aSig = extractFloatx80Frac( a ); 3691129203Scognet aExp = extractFloatx80Exp( a ); 3692129203Scognet bSig = extractFloatx80Frac( b ); 3693129203Scognet bExp = extractFloatx80Exp( b ); 3694129203Scognet expDiff = aExp - bExp; 3695129203Scognet if ( 0 < expDiff ) goto aExpBigger; 3696129203Scognet if ( expDiff < 0 ) goto bExpBigger; 3697129203Scognet if ( aExp == 0x7FFF ) { 3698129203Scognet if ( (bits64) ( ( aSig | bSig )<<1 ) ) { 3699129203Scognet return propagateFloatx80NaN( a, b ); 3700129203Scognet } 3701129203Scognet float_raise( float_flag_invalid ); 3702129203Scognet z.low = floatx80_default_nan_low; 3703129203Scognet z.high = floatx80_default_nan_high; 3704129203Scognet return z; 3705129203Scognet } 3706129203Scognet if ( aExp == 0 ) { 3707129203Scognet aExp = 1; 3708129203Scognet bExp = 1; 3709129203Scognet } 3710129203Scognet zSig1 = 0; 3711129203Scognet if ( bSig < aSig ) goto aBigger; 3712129203Scognet if ( aSig < bSig ) goto bBigger; 3713129203Scognet return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); 3714129203Scognet bExpBigger: 3715129203Scognet if ( bExp == 0x7FFF ) { 3716129203Scognet if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3717129203Scognet return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3718129203Scognet } 3719129203Scognet if ( aExp == 0 ) ++expDiff; 3720129203Scognet shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 3721129203Scognet bBigger: 3722129203Scognet sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); 3723129203Scognet zExp = bExp; 3724129203Scognet zSign ^= 1; 3725129203Scognet goto normalizeRoundAndPack; 3726129203Scognet aExpBigger: 3727129203Scognet if ( aExp == 0x7FFF ) { 3728129203Scognet if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3729129203Scognet return a; 3730129203Scognet } 3731129203Scognet if ( bExp == 0 ) --expDiff; 3732129203Scognet shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 3733129203Scognet aBigger: 3734129203Scognet sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); 3735129203Scognet zExp = aExp; 3736129203Scognet normalizeRoundAndPack: 3737129203Scognet return 3738129203Scognet normalizeRoundAndPackFloatx80( 3739129203Scognet floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3740129203Scognet 3741129203Scognet} 3742129203Scognet 3743129203Scognet/* 3744129203Scognet------------------------------------------------------------------------------- 3745129203ScognetReturns the result of adding the extended double-precision floating-point 3746129203Scognetvalues `a' and `b'. The operation is performed according to the IEC/IEEE 3747129203ScognetStandard for Binary Floating-Point Arithmetic. 3748129203Scognet------------------------------------------------------------------------------- 3749129203Scognet*/ 3750129203Scognetfloatx80 floatx80_add( floatx80 a, floatx80 b ) 3751129203Scognet{ 3752129203Scognet flag aSign, bSign; 3753129203Scognet 3754129203Scognet aSign = extractFloatx80Sign( a ); 3755129203Scognet bSign = extractFloatx80Sign( b ); 3756129203Scognet if ( aSign == bSign ) { 3757129203Scognet return addFloatx80Sigs( a, b, aSign ); 3758129203Scognet } 3759129203Scognet else { 3760129203Scognet return subFloatx80Sigs( a, b, aSign ); 3761129203Scognet } 3762129203Scognet 3763129203Scognet} 3764129203Scognet 3765129203Scognet/* 3766129203Scognet------------------------------------------------------------------------------- 3767129203ScognetReturns the result of subtracting the extended double-precision floating- 3768129203Scognetpoint values `a' and `b'. The operation is performed according to the 3769129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 3770129203Scognet------------------------------------------------------------------------------- 3771129203Scognet*/ 3772129203Scognetfloatx80 floatx80_sub( floatx80 a, floatx80 b ) 3773129203Scognet{ 3774129203Scognet flag aSign, bSign; 3775129203Scognet 3776129203Scognet aSign = extractFloatx80Sign( a ); 3777129203Scognet bSign = extractFloatx80Sign( b ); 3778129203Scognet if ( aSign == bSign ) { 3779129203Scognet return subFloatx80Sigs( a, b, aSign ); 3780129203Scognet } 3781129203Scognet else { 3782129203Scognet return addFloatx80Sigs( a, b, aSign ); 3783129203Scognet } 3784129203Scognet 3785129203Scognet} 3786129203Scognet 3787129203Scognet/* 3788129203Scognet------------------------------------------------------------------------------- 3789129203ScognetReturns the result of multiplying the extended double-precision floating- 3790129203Scognetpoint values `a' and `b'. The operation is performed according to the 3791129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 3792129203Scognet------------------------------------------------------------------------------- 3793129203Scognet*/ 3794129203Scognetfloatx80 floatx80_mul( floatx80 a, floatx80 b ) 3795129203Scognet{ 3796129203Scognet flag aSign, bSign, zSign; 3797129203Scognet int32 aExp, bExp, zExp; 3798129203Scognet bits64 aSig, bSig, zSig0, zSig1; 3799129203Scognet floatx80 z; 3800129203Scognet 3801129203Scognet aSig = extractFloatx80Frac( a ); 3802129203Scognet aExp = extractFloatx80Exp( a ); 3803129203Scognet aSign = extractFloatx80Sign( a ); 3804129203Scognet bSig = extractFloatx80Frac( b ); 3805129203Scognet bExp = extractFloatx80Exp( b ); 3806129203Scognet bSign = extractFloatx80Sign( b ); 3807129203Scognet zSign = aSign ^ bSign; 3808129203Scognet if ( aExp == 0x7FFF ) { 3809129203Scognet if ( (bits64) ( aSig<<1 ) 3810129203Scognet || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { 3811129203Scognet return propagateFloatx80NaN( a, b ); 3812129203Scognet } 3813129203Scognet if ( ( bExp | bSig ) == 0 ) goto invalid; 3814129203Scognet return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3815129203Scognet } 3816129203Scognet if ( bExp == 0x7FFF ) { 3817129203Scognet if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3818129203Scognet if ( ( aExp | aSig ) == 0 ) { 3819129203Scognet invalid: 3820129203Scognet float_raise( float_flag_invalid ); 3821129203Scognet z.low = floatx80_default_nan_low; 3822129203Scognet z.high = floatx80_default_nan_high; 3823129203Scognet return z; 3824129203Scognet } 3825129203Scognet return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3826129203Scognet } 3827129203Scognet if ( aExp == 0 ) { 3828129203Scognet if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 3829129203Scognet normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 3830129203Scognet } 3831129203Scognet if ( bExp == 0 ) { 3832129203Scognet if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); 3833129203Scognet normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3834129203Scognet } 3835129203Scognet zExp = aExp + bExp - 0x3FFE; 3836129203Scognet mul64To128( aSig, bSig, &zSig0, &zSig1 ); 3837129203Scognet if ( 0 < (sbits64) zSig0 ) { 3838129203Scognet shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); 3839129203Scognet --zExp; 3840129203Scognet } 3841129203Scognet return 3842129203Scognet roundAndPackFloatx80( 3843129203Scognet floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3844129203Scognet 3845129203Scognet} 3846129203Scognet 3847129203Scognet/* 3848129203Scognet------------------------------------------------------------------------------- 3849129203ScognetReturns the result of dividing the extended double-precision floating-point 3850129203Scognetvalue `a' by the corresponding value `b'. The operation is performed 3851129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3852129203Scognet------------------------------------------------------------------------------- 3853129203Scognet*/ 3854129203Scognetfloatx80 floatx80_div( floatx80 a, floatx80 b ) 3855129203Scognet{ 3856129203Scognet flag aSign, bSign, zSign; 3857129203Scognet int32 aExp, bExp, zExp; 3858129203Scognet bits64 aSig, bSig, zSig0, zSig1; 3859129203Scognet bits64 rem0, rem1, rem2, term0, term1, term2; 3860129203Scognet floatx80 z; 3861129203Scognet 3862129203Scognet aSig = extractFloatx80Frac( a ); 3863129203Scognet aExp = extractFloatx80Exp( a ); 3864129203Scognet aSign = extractFloatx80Sign( a ); 3865129203Scognet bSig = extractFloatx80Frac( b ); 3866129203Scognet bExp = extractFloatx80Exp( b ); 3867129203Scognet bSign = extractFloatx80Sign( b ); 3868129203Scognet zSign = aSign ^ bSign; 3869129203Scognet if ( aExp == 0x7FFF ) { 3870129203Scognet if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3871129203Scognet if ( bExp == 0x7FFF ) { 3872129203Scognet if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3873129203Scognet goto invalid; 3874129203Scognet } 3875129203Scognet return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3876129203Scognet } 3877129203Scognet if ( bExp == 0x7FFF ) { 3878129203Scognet if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3879129203Scognet return packFloatx80( zSign, 0, 0 ); 3880129203Scognet } 3881129203Scognet if ( bExp == 0 ) { 3882129203Scognet if ( bSig == 0 ) { 3883129203Scognet if ( ( aExp | aSig ) == 0 ) { 3884129203Scognet invalid: 3885129203Scognet float_raise( float_flag_invalid ); 3886129203Scognet z.low = floatx80_default_nan_low; 3887129203Scognet z.high = floatx80_default_nan_high; 3888129203Scognet return z; 3889129203Scognet } 3890129203Scognet float_raise( float_flag_divbyzero ); 3891129203Scognet return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3892129203Scognet } 3893129203Scognet normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3894129203Scognet } 3895129203Scognet if ( aExp == 0 ) { 3896129203Scognet if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 3897129203Scognet normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 3898129203Scognet } 3899129203Scognet zExp = aExp - bExp + 0x3FFE; 3900129203Scognet rem1 = 0; 3901129203Scognet if ( bSig <= aSig ) { 3902129203Scognet shift128Right( aSig, 0, 1, &aSig, &rem1 ); 3903129203Scognet ++zExp; 3904129203Scognet } 3905129203Scognet zSig0 = estimateDiv128To64( aSig, rem1, bSig ); 3906129203Scognet mul64To128( bSig, zSig0, &term0, &term1 ); 3907129203Scognet sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); 3908129203Scognet while ( (sbits64) rem0 < 0 ) { 3909129203Scognet --zSig0; 3910129203Scognet add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 3911129203Scognet } 3912129203Scognet zSig1 = estimateDiv128To64( rem1, 0, bSig ); 3913129203Scognet if ( (bits64) ( zSig1<<1 ) <= 8 ) { 3914129203Scognet mul64To128( bSig, zSig1, &term1, &term2 ); 3915129203Scognet sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 3916129203Scognet while ( (sbits64) rem1 < 0 ) { 3917129203Scognet --zSig1; 3918129203Scognet add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); 3919129203Scognet } 3920129203Scognet zSig1 |= ( ( rem1 | rem2 ) != 0 ); 3921129203Scognet } 3922129203Scognet return 3923129203Scognet roundAndPackFloatx80( 3924129203Scognet floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3925129203Scognet 3926129203Scognet} 3927129203Scognet 3928129203Scognet/* 3929129203Scognet------------------------------------------------------------------------------- 3930129203ScognetReturns the remainder of the extended double-precision floating-point value 3931129203Scognet`a' with respect to the corresponding value `b'. The operation is performed 3932129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3933129203Scognet------------------------------------------------------------------------------- 3934129203Scognet*/ 3935129203Scognetfloatx80 floatx80_rem( floatx80 a, floatx80 b ) 3936129203Scognet{ 3937129203Scognet flag aSign, bSign, zSign; 3938129203Scognet int32 aExp, bExp, expDiff; 3939129203Scognet bits64 aSig0, aSig1, bSig; 3940129203Scognet bits64 q, term0, term1, alternateASig0, alternateASig1; 3941129203Scognet floatx80 z; 3942129203Scognet 3943129203Scognet aSig0 = extractFloatx80Frac( a ); 3944129203Scognet aExp = extractFloatx80Exp( a ); 3945129203Scognet aSign = extractFloatx80Sign( a ); 3946129203Scognet bSig = extractFloatx80Frac( b ); 3947129203Scognet bExp = extractFloatx80Exp( b ); 3948129203Scognet bSign = extractFloatx80Sign( b ); 3949129203Scognet if ( aExp == 0x7FFF ) { 3950129203Scognet if ( (bits64) ( aSig0<<1 ) 3951129203Scognet || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { 3952129203Scognet return propagateFloatx80NaN( a, b ); 3953129203Scognet } 3954129203Scognet goto invalid; 3955129203Scognet } 3956129203Scognet if ( bExp == 0x7FFF ) { 3957129203Scognet if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3958129203Scognet return a; 3959129203Scognet } 3960129203Scognet if ( bExp == 0 ) { 3961129203Scognet if ( bSig == 0 ) { 3962129203Scognet invalid: 3963129203Scognet float_raise( float_flag_invalid ); 3964129203Scognet z.low = floatx80_default_nan_low; 3965129203Scognet z.high = floatx80_default_nan_high; 3966129203Scognet return z; 3967129203Scognet } 3968129203Scognet normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3969129203Scognet } 3970129203Scognet if ( aExp == 0 ) { 3971129203Scognet if ( (bits64) ( aSig0<<1 ) == 0 ) return a; 3972129203Scognet normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 3973129203Scognet } 3974129203Scognet bSig |= LIT64( 0x8000000000000000 ); 3975129203Scognet zSign = aSign; 3976129203Scognet expDiff = aExp - bExp; 3977129203Scognet aSig1 = 0; 3978129203Scognet if ( expDiff < 0 ) { 3979129203Scognet if ( expDiff < -1 ) return a; 3980129203Scognet shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); 3981129203Scognet expDiff = 0; 3982129203Scognet } 3983129203Scognet q = ( bSig <= aSig0 ); 3984129203Scognet if ( q ) aSig0 -= bSig; 3985129203Scognet expDiff -= 64; 3986129203Scognet while ( 0 < expDiff ) { 3987129203Scognet q = estimateDiv128To64( aSig0, aSig1, bSig ); 3988129203Scognet q = ( 2 < q ) ? q - 2 : 0; 3989129203Scognet mul64To128( bSig, q, &term0, &term1 ); 3990129203Scognet sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 3991129203Scognet shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); 3992129203Scognet expDiff -= 62; 3993129203Scognet } 3994129203Scognet expDiff += 64; 3995129203Scognet if ( 0 < expDiff ) { 3996129203Scognet q = estimateDiv128To64( aSig0, aSig1, bSig ); 3997129203Scognet q = ( 2 < q ) ? q - 2 : 0; 3998129203Scognet q >>= 64 - expDiff; 3999129203Scognet mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); 4000129203Scognet sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 4001129203Scognet shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); 4002129203Scognet while ( le128( term0, term1, aSig0, aSig1 ) ) { 4003129203Scognet ++q; 4004129203Scognet sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 4005129203Scognet } 4006129203Scognet } 4007129203Scognet else { 4008129203Scognet term1 = 0; 4009129203Scognet term0 = bSig; 4010129203Scognet } 4011129203Scognet sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); 4012129203Scognet if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) 4013129203Scognet || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) 4014129203Scognet && ( q & 1 ) ) 4015129203Scognet ) { 4016129203Scognet aSig0 = alternateASig0; 4017129203Scognet aSig1 = alternateASig1; 4018129203Scognet zSign = ! zSign; 4019129203Scognet } 4020129203Scognet return 4021129203Scognet normalizeRoundAndPackFloatx80( 4022129203Scognet 80, zSign, bExp + expDiff, aSig0, aSig1 ); 4023129203Scognet 4024129203Scognet} 4025129203Scognet 4026129203Scognet/* 4027129203Scognet------------------------------------------------------------------------------- 4028129203ScognetReturns the square root of the extended double-precision floating-point 4029129203Scognetvalue `a'. The operation is performed according to the IEC/IEEE Standard 4030129203Scognetfor Binary Floating-Point Arithmetic. 4031129203Scognet------------------------------------------------------------------------------- 4032129203Scognet*/ 4033129203Scognetfloatx80 floatx80_sqrt( floatx80 a ) 4034129203Scognet{ 4035129203Scognet flag aSign; 4036129203Scognet int32 aExp, zExp; 4037129203Scognet bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0; 4038129203Scognet bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 4039129203Scognet floatx80 z; 4040129203Scognet 4041129203Scognet aSig0 = extractFloatx80Frac( a ); 4042129203Scognet aExp = extractFloatx80Exp( a ); 4043129203Scognet aSign = extractFloatx80Sign( a ); 4044129203Scognet if ( aExp == 0x7FFF ) { 4045129203Scognet if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); 4046129203Scognet if ( ! aSign ) return a; 4047129203Scognet goto invalid; 4048129203Scognet } 4049129203Scognet if ( aSign ) { 4050129203Scognet if ( ( aExp | aSig0 ) == 0 ) return a; 4051129203Scognet invalid: 4052129203Scognet float_raise( float_flag_invalid ); 4053129203Scognet z.low = floatx80_default_nan_low; 4054129203Scognet z.high = floatx80_default_nan_high; 4055129203Scognet return z; 4056129203Scognet } 4057129203Scognet if ( aExp == 0 ) { 4058129203Scognet if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); 4059129203Scognet normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 4060129203Scognet } 4061129203Scognet zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; 4062129203Scognet zSig0 = estimateSqrt32( aExp, aSig0>>32 ); 4063129203Scognet shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); 4064129203Scognet zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); 4065129203Scognet doubleZSig0 = zSig0<<1; 4066129203Scognet mul64To128( zSig0, zSig0, &term0, &term1 ); 4067129203Scognet sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 4068129203Scognet while ( (sbits64) rem0 < 0 ) { 4069129203Scognet --zSig0; 4070129203Scognet doubleZSig0 -= 2; 4071129203Scognet add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); 4072129203Scognet } 4073129203Scognet zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); 4074129203Scognet if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { 4075129203Scognet if ( zSig1 == 0 ) zSig1 = 1; 4076129203Scognet mul64To128( doubleZSig0, zSig1, &term1, &term2 ); 4077129203Scognet sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 4078129203Scognet mul64To128( zSig1, zSig1, &term2, &term3 ); 4079129203Scognet sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 4080129203Scognet while ( (sbits64) rem1 < 0 ) { 4081129203Scognet --zSig1; 4082129203Scognet shortShift128Left( 0, zSig1, 1, &term2, &term3 ); 4083129203Scognet term3 |= 1; 4084129203Scognet term2 |= doubleZSig0; 4085129203Scognet add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 4086129203Scognet } 4087129203Scognet zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 4088129203Scognet } 4089129203Scognet shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); 4090129203Scognet zSig0 |= doubleZSig0; 4091129203Scognet return 4092129203Scognet roundAndPackFloatx80( 4093129203Scognet floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); 4094129203Scognet 4095129203Scognet} 4096129203Scognet 4097129203Scognet/* 4098129203Scognet------------------------------------------------------------------------------- 4099129203ScognetReturns 1 if the extended double-precision floating-point value `a' is 4100129203Scognetequal to the corresponding value `b', and 0 otherwise. The comparison is 4101129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 4102129203ScognetArithmetic. 4103129203Scognet------------------------------------------------------------------------------- 4104129203Scognet*/ 4105129203Scognetflag floatx80_eq( floatx80 a, floatx80 b ) 4106129203Scognet{ 4107129203Scognet 4108129203Scognet if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4109129203Scognet && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4110129203Scognet || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4111129203Scognet && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4112129203Scognet ) { 4113129203Scognet if ( floatx80_is_signaling_nan( a ) 4114129203Scognet || floatx80_is_signaling_nan( b ) ) { 4115129203Scognet float_raise( float_flag_invalid ); 4116129203Scognet } 4117129203Scognet return 0; 4118129203Scognet } 4119129203Scognet return 4120129203Scognet ( a.low == b.low ) 4121129203Scognet && ( ( a.high == b.high ) 4122129203Scognet || ( ( a.low == 0 ) 4123129203Scognet && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) 4124129203Scognet ); 4125129203Scognet 4126129203Scognet} 4127129203Scognet 4128129203Scognet/* 4129129203Scognet------------------------------------------------------------------------------- 4130129203ScognetReturns 1 if the extended double-precision floating-point value `a' is 4131129203Scognetless than or equal to the corresponding value `b', and 0 otherwise. The 4132129203Scognetcomparison is performed according to the IEC/IEEE Standard for Binary 4133129203ScognetFloating-Point Arithmetic. 4134129203Scognet------------------------------------------------------------------------------- 4135129203Scognet*/ 4136129203Scognetflag floatx80_le( floatx80 a, floatx80 b ) 4137129203Scognet{ 4138129203Scognet flag aSign, bSign; 4139129203Scognet 4140129203Scognet if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4141129203Scognet && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4142129203Scognet || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4143129203Scognet && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4144129203Scognet ) { 4145129203Scognet float_raise( float_flag_invalid ); 4146129203Scognet return 0; 4147129203Scognet } 4148129203Scognet aSign = extractFloatx80Sign( a ); 4149129203Scognet bSign = extractFloatx80Sign( b ); 4150129203Scognet if ( aSign != bSign ) { 4151129203Scognet return 4152129203Scognet aSign 4153129203Scognet || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 4154129203Scognet == 0 ); 4155129203Scognet } 4156129203Scognet return 4157129203Scognet aSign ? le128( b.high, b.low, a.high, a.low ) 4158129203Scognet : le128( a.high, a.low, b.high, b.low ); 4159129203Scognet 4160129203Scognet} 4161129203Scognet 4162129203Scognet/* 4163129203Scognet------------------------------------------------------------------------------- 4164129203ScognetReturns 1 if the extended double-precision floating-point value `a' is 4165129203Scognetless than the corresponding value `b', and 0 otherwise. The comparison 4166129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4167129203ScognetArithmetic. 4168129203Scognet------------------------------------------------------------------------------- 4169129203Scognet*/ 4170129203Scognetflag floatx80_lt( floatx80 a, floatx80 b ) 4171129203Scognet{ 4172129203Scognet flag aSign, bSign; 4173129203Scognet 4174129203Scognet if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4175129203Scognet && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4176129203Scognet || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4177129203Scognet && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4178129203Scognet ) { 4179129203Scognet float_raise( float_flag_invalid ); 4180129203Scognet return 0; 4181129203Scognet } 4182129203Scognet aSign = extractFloatx80Sign( a ); 4183129203Scognet bSign = extractFloatx80Sign( b ); 4184129203Scognet if ( aSign != bSign ) { 4185129203Scognet return 4186129203Scognet aSign 4187129203Scognet && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 4188129203Scognet != 0 ); 4189129203Scognet } 4190129203Scognet return 4191129203Scognet aSign ? lt128( b.high, b.low, a.high, a.low ) 4192129203Scognet : lt128( a.high, a.low, b.high, b.low ); 4193129203Scognet 4194129203Scognet} 4195129203Scognet 4196129203Scognet/* 4197129203Scognet------------------------------------------------------------------------------- 4198129203ScognetReturns 1 if the extended double-precision floating-point value `a' is equal 4199129203Scognetto the corresponding value `b', and 0 otherwise. The invalid exception is 4200129203Scognetraised if either operand is a NaN. Otherwise, the comparison is performed 4201129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4202129203Scognet------------------------------------------------------------------------------- 4203129203Scognet*/ 4204129203Scognetflag floatx80_eq_signaling( floatx80 a, floatx80 b ) 4205129203Scognet{ 4206129203Scognet 4207129203Scognet if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4208129203Scognet && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4209129203Scognet || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4210129203Scognet && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4211129203Scognet ) { 4212129203Scognet float_raise( float_flag_invalid ); 4213129203Scognet return 0; 4214129203Scognet } 4215129203Scognet return 4216129203Scognet ( a.low == b.low ) 4217129203Scognet && ( ( a.high == b.high ) 4218129203Scognet || ( ( a.low == 0 ) 4219129203Scognet && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) 4220129203Scognet ); 4221129203Scognet 4222129203Scognet} 4223129203Scognet 4224129203Scognet/* 4225129203Scognet------------------------------------------------------------------------------- 4226129203ScognetReturns 1 if the extended double-precision floating-point value `a' is less 4227129203Scognetthan or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs 4228129203Scognetdo not cause an exception. Otherwise, the comparison is performed according 4229129203Scognetto the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4230129203Scognet------------------------------------------------------------------------------- 4231129203Scognet*/ 4232129203Scognetflag floatx80_le_quiet( floatx80 a, floatx80 b ) 4233129203Scognet{ 4234129203Scognet flag aSign, bSign; 4235129203Scognet 4236129203Scognet if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4237129203Scognet && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4238129203Scognet || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4239129203Scognet && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4240129203Scognet ) { 4241129203Scognet if ( floatx80_is_signaling_nan( a ) 4242129203Scognet || floatx80_is_signaling_nan( b ) ) { 4243129203Scognet float_raise( float_flag_invalid ); 4244129203Scognet } 4245129203Scognet return 0; 4246129203Scognet } 4247129203Scognet aSign = extractFloatx80Sign( a ); 4248129203Scognet bSign = extractFloatx80Sign( b ); 4249129203Scognet if ( aSign != bSign ) { 4250129203Scognet return 4251129203Scognet aSign 4252129203Scognet || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 4253129203Scognet == 0 ); 4254129203Scognet } 4255129203Scognet return 4256129203Scognet aSign ? le128( b.high, b.low, a.high, a.low ) 4257129203Scognet : le128( a.high, a.low, b.high, b.low ); 4258129203Scognet 4259129203Scognet} 4260129203Scognet 4261129203Scognet/* 4262129203Scognet------------------------------------------------------------------------------- 4263129203ScognetReturns 1 if the extended double-precision floating-point value `a' is less 4264129203Scognetthan the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause 4265129203Scognetan exception. Otherwise, the comparison is performed according to the 4266129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 4267129203Scognet------------------------------------------------------------------------------- 4268129203Scognet*/ 4269129203Scognetflag floatx80_lt_quiet( floatx80 a, floatx80 b ) 4270129203Scognet{ 4271129203Scognet flag aSign, bSign; 4272129203Scognet 4273129203Scognet if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4274129203Scognet && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4275129203Scognet || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4276129203Scognet && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4277129203Scognet ) { 4278129203Scognet if ( floatx80_is_signaling_nan( a ) 4279129203Scognet || floatx80_is_signaling_nan( b ) ) { 4280129203Scognet float_raise( float_flag_invalid ); 4281129203Scognet } 4282129203Scognet return 0; 4283129203Scognet } 4284129203Scognet aSign = extractFloatx80Sign( a ); 4285129203Scognet bSign = extractFloatx80Sign( b ); 4286129203Scognet if ( aSign != bSign ) { 4287129203Scognet return 4288129203Scognet aSign 4289129203Scognet && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 4290129203Scognet != 0 ); 4291129203Scognet } 4292129203Scognet return 4293129203Scognet aSign ? lt128( b.high, b.low, a.high, a.low ) 4294129203Scognet : lt128( a.high, a.low, b.high, b.low ); 4295129203Scognet 4296129203Scognet} 4297129203Scognet 4298129203Scognet#endif 4299129203Scognet 4300129203Scognet#ifdef FLOAT128 4301129203Scognet 4302129203Scognet/* 4303129203Scognet------------------------------------------------------------------------------- 4304129203ScognetReturns the result of converting the quadruple-precision floating-point 4305129203Scognetvalue `a' to the 32-bit two's complement integer format. The conversion 4306129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4307129203ScognetArithmetic---which means in particular that the conversion is rounded 4308129203Scognetaccording to the current rounding mode. If `a' is a NaN, the largest 4309129203Scognetpositive integer is returned. Otherwise, if the conversion overflows, the 4310129203Scognetlargest integer with the same sign as `a' is returned. 4311129203Scognet------------------------------------------------------------------------------- 4312129203Scognet*/ 4313129203Scognetint32 float128_to_int32( float128 a ) 4314129203Scognet{ 4315129203Scognet flag aSign; 4316129203Scognet int32 aExp, shiftCount; 4317129203Scognet bits64 aSig0, aSig1; 4318129203Scognet 4319129203Scognet aSig1 = extractFloat128Frac1( a ); 4320129203Scognet aSig0 = extractFloat128Frac0( a ); 4321129203Scognet aExp = extractFloat128Exp( a ); 4322129203Scognet aSign = extractFloat128Sign( a ); 4323129203Scognet if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; 4324129203Scognet if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4325129203Scognet aSig0 |= ( aSig1 != 0 ); 4326129203Scognet shiftCount = 0x4028 - aExp; 4327129203Scognet if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); 4328129203Scognet return roundAndPackInt32( aSign, aSig0 ); 4329129203Scognet 4330129203Scognet} 4331129203Scognet 4332129203Scognet/* 4333129203Scognet------------------------------------------------------------------------------- 4334129203ScognetReturns the result of converting the quadruple-precision floating-point 4335129203Scognetvalue `a' to the 32-bit two's complement integer format. The conversion 4336129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4337129203ScognetArithmetic, except that the conversion is always rounded toward zero. If 4338129203Scognet`a' is a NaN, the largest positive integer is returned. Otherwise, if the 4339129203Scognetconversion overflows, the largest integer with the same sign as `a' is 4340129203Scognetreturned. 4341129203Scognet------------------------------------------------------------------------------- 4342129203Scognet*/ 4343129203Scognetint32 float128_to_int32_round_to_zero( float128 a ) 4344129203Scognet{ 4345129203Scognet flag aSign; 4346129203Scognet int32 aExp, shiftCount; 4347129203Scognet bits64 aSig0, aSig1, savedASig; 4348129203Scognet int32 z; 4349129203Scognet 4350129203Scognet aSig1 = extractFloat128Frac1( a ); 4351129203Scognet aSig0 = extractFloat128Frac0( a ); 4352129203Scognet aExp = extractFloat128Exp( a ); 4353129203Scognet aSign = extractFloat128Sign( a ); 4354129203Scognet aSig0 |= ( aSig1 != 0 ); 4355129203Scognet if ( 0x401E < aExp ) { 4356129203Scognet if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0; 4357129203Scognet goto invalid; 4358129203Scognet } 4359129203Scognet else if ( aExp < 0x3FFF ) { 4360129203Scognet if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact; 4361129203Scognet return 0; 4362129203Scognet } 4363129203Scognet aSig0 |= LIT64( 0x0001000000000000 ); 4364129203Scognet shiftCount = 0x402F - aExp; 4365129203Scognet savedASig = aSig0; 4366129203Scognet aSig0 >>= shiftCount; 4367129203Scognet z = aSig0; 4368129203Scognet if ( aSign ) z = - z; 4369129203Scognet if ( ( z < 0 ) ^ aSign ) { 4370129203Scognet invalid: 4371129203Scognet float_raise( float_flag_invalid ); 4372129203Scognet return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 4373129203Scognet } 4374129203Scognet if ( ( aSig0<<shiftCount ) != savedASig ) { 4375129203Scognet float_exception_flags |= float_flag_inexact; 4376129203Scognet } 4377129203Scognet return z; 4378129203Scognet 4379129203Scognet} 4380129203Scognet 4381129203Scognet/* 4382129203Scognet------------------------------------------------------------------------------- 4383129203ScognetReturns the result of converting the quadruple-precision floating-point 4384129203Scognetvalue `a' to the 64-bit two's complement integer format. The conversion 4385129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4386129203ScognetArithmetic---which means in particular that the conversion is rounded 4387129203Scognetaccording to the current rounding mode. If `a' is a NaN, the largest 4388129203Scognetpositive integer is returned. Otherwise, if the conversion overflows, the 4389129203Scognetlargest integer with the same sign as `a' is returned. 4390129203Scognet------------------------------------------------------------------------------- 4391129203Scognet*/ 4392129203Scognetint64 float128_to_int64( float128 a ) 4393129203Scognet{ 4394129203Scognet flag aSign; 4395129203Scognet int32 aExp, shiftCount; 4396129203Scognet bits64 aSig0, aSig1; 4397129203Scognet 4398129203Scognet aSig1 = extractFloat128Frac1( a ); 4399129203Scognet aSig0 = extractFloat128Frac0( a ); 4400129203Scognet aExp = extractFloat128Exp( a ); 4401129203Scognet aSign = extractFloat128Sign( a ); 4402129203Scognet if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4403129203Scognet shiftCount = 0x402F - aExp; 4404129203Scognet if ( shiftCount <= 0 ) { 4405129203Scognet if ( 0x403E < aExp ) { 4406129203Scognet float_raise( float_flag_invalid ); 4407129203Scognet if ( ! aSign 4408129203Scognet || ( ( aExp == 0x7FFF ) 4409129203Scognet && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) ) 4410129203Scognet ) 4411129203Scognet ) { 4412129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 4413129203Scognet } 4414129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 4415129203Scognet } 4416129203Scognet shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 ); 4417129203Scognet } 4418129203Scognet else { 4419129203Scognet shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 ); 4420129203Scognet } 4421129203Scognet return roundAndPackInt64( aSign, aSig0, aSig1 ); 4422129203Scognet 4423129203Scognet} 4424129203Scognet 4425129203Scognet/* 4426129203Scognet------------------------------------------------------------------------------- 4427129203ScognetReturns the result of converting the quadruple-precision floating-point 4428129203Scognetvalue `a' to the 64-bit two's complement integer format. The conversion 4429129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4430129203ScognetArithmetic, except that the conversion is always rounded toward zero. 4431129203ScognetIf `a' is a NaN, the largest positive integer is returned. Otherwise, if 4432129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is 4433129203Scognetreturned. 4434129203Scognet------------------------------------------------------------------------------- 4435129203Scognet*/ 4436129203Scognetint64 float128_to_int64_round_to_zero( float128 a ) 4437129203Scognet{ 4438129203Scognet flag aSign; 4439129203Scognet int32 aExp, shiftCount; 4440129203Scognet bits64 aSig0, aSig1; 4441129203Scognet int64 z; 4442129203Scognet 4443129203Scognet aSig1 = extractFloat128Frac1( a ); 4444129203Scognet aSig0 = extractFloat128Frac0( a ); 4445129203Scognet aExp = extractFloat128Exp( a ); 4446129203Scognet aSign = extractFloat128Sign( a ); 4447129203Scognet if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4448129203Scognet shiftCount = aExp - 0x402F; 4449129203Scognet if ( 0 < shiftCount ) { 4450129203Scognet if ( 0x403E <= aExp ) { 4451129203Scognet aSig0 &= LIT64( 0x0000FFFFFFFFFFFF ); 4452129203Scognet if ( ( a.high == LIT64( 0xC03E000000000000 ) ) 4453129203Scognet && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) { 4454129203Scognet if ( aSig1 ) float_exception_flags |= float_flag_inexact; 4455129203Scognet } 4456129203Scognet else { 4457129203Scognet float_raise( float_flag_invalid ); 4458129203Scognet if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) { 4459129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 4460129203Scognet } 4461129203Scognet } 4462129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 4463129203Scognet } 4464129203Scognet z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) ); 4465129203Scognet if ( (bits64) ( aSig1<<shiftCount ) ) { 4466129203Scognet float_exception_flags |= float_flag_inexact; 4467129203Scognet } 4468129203Scognet } 4469129203Scognet else { 4470129203Scognet if ( aExp < 0x3FFF ) { 4471129203Scognet if ( aExp | aSig0 | aSig1 ) { 4472129203Scognet float_exception_flags |= float_flag_inexact; 4473129203Scognet } 4474129203Scognet return 0; 4475129203Scognet } 4476129203Scognet z = aSig0>>( - shiftCount ); 4477129203Scognet if ( aSig1 4478129203Scognet || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) { 4479129203Scognet float_exception_flags |= float_flag_inexact; 4480129203Scognet } 4481129203Scognet } 4482129203Scognet if ( aSign ) z = - z; 4483129203Scognet return z; 4484129203Scognet 4485129203Scognet} 4486129203Scognet 4487230363Sdas#if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \ 4488230363Sdas && defined(SOFTFLOAT_NEED_FIXUNS) 4489129203Scognet/* 4490230363Sdas * just like above - but do not care for overflow of signed results 4491230363Sdas */ 4492230363Sdasuint64 float128_to_uint64_round_to_zero( float128 a ) 4493230363Sdas{ 4494230363Sdas flag aSign; 4495230363Sdas int32 aExp, shiftCount; 4496230363Sdas bits64 aSig0, aSig1; 4497230363Sdas uint64 z; 4498230363Sdas 4499230363Sdas aSig1 = extractFloat128Frac1( a ); 4500230363Sdas aSig0 = extractFloat128Frac0( a ); 4501230363Sdas aExp = extractFloat128Exp( a ); 4502230363Sdas aSign = extractFloat128Sign( a ); 4503230363Sdas if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4504230363Sdas shiftCount = aExp - 0x402F; 4505230363Sdas if ( 0 < shiftCount ) { 4506230363Sdas if ( 0x403F <= aExp ) { 4507230363Sdas aSig0 &= LIT64( 0x0000FFFFFFFFFFFF ); 4508230363Sdas if ( ( a.high == LIT64( 0xC03E000000000000 ) ) 4509230363Sdas && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) { 4510230363Sdas if ( aSig1 ) float_exception_flags |= float_flag_inexact; 4511230363Sdas } 4512230363Sdas else { 4513230363Sdas float_raise( float_flag_invalid ); 4514230363Sdas } 4515230363Sdas return LIT64( 0xFFFFFFFFFFFFFFFF ); 4516230363Sdas } 4517230363Sdas z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) ); 4518230363Sdas if ( (bits64) ( aSig1<<shiftCount ) ) { 4519230363Sdas float_exception_flags |= float_flag_inexact; 4520230363Sdas } 4521230363Sdas } 4522230363Sdas else { 4523230363Sdas if ( aExp < 0x3FFF ) { 4524230363Sdas if ( aExp | aSig0 | aSig1 ) { 4525230363Sdas float_exception_flags |= float_flag_inexact; 4526230363Sdas } 4527230363Sdas return 0; 4528230363Sdas } 4529230363Sdas z = aSig0>>( - shiftCount ); 4530230363Sdas if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) { 4531230363Sdas float_exception_flags |= float_flag_inexact; 4532230363Sdas } 4533230363Sdas } 4534230363Sdas if ( aSign ) z = - z; 4535230363Sdas return z; 4536230363Sdas 4537230363Sdas} 4538230363Sdas#endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */ 4539230363Sdas 4540230363Sdas/* 4541129203Scognet------------------------------------------------------------------------------- 4542129203ScognetReturns the result of converting the quadruple-precision floating-point 4543129203Scognetvalue `a' to the single-precision floating-point format. The conversion 4544129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4545129203ScognetArithmetic. 4546129203Scognet------------------------------------------------------------------------------- 4547129203Scognet*/ 4548129203Scognetfloat32 float128_to_float32( float128 a ) 4549129203Scognet{ 4550129203Scognet flag aSign; 4551129203Scognet int32 aExp; 4552129203Scognet bits64 aSig0, aSig1; 4553129203Scognet bits32 zSig; 4554129203Scognet 4555129203Scognet aSig1 = extractFloat128Frac1( a ); 4556129203Scognet aSig0 = extractFloat128Frac0( a ); 4557129203Scognet aExp = extractFloat128Exp( a ); 4558129203Scognet aSign = extractFloat128Sign( a ); 4559129203Scognet if ( aExp == 0x7FFF ) { 4560129203Scognet if ( aSig0 | aSig1 ) { 4561129203Scognet return commonNaNToFloat32( float128ToCommonNaN( a ) ); 4562129203Scognet } 4563129203Scognet return packFloat32( aSign, 0xFF, 0 ); 4564129203Scognet } 4565129203Scognet aSig0 |= ( aSig1 != 0 ); 4566129203Scognet shift64RightJamming( aSig0, 18, &aSig0 ); 4567129203Scognet zSig = aSig0; 4568129203Scognet if ( aExp || zSig ) { 4569129203Scognet zSig |= 0x40000000; 4570129203Scognet aExp -= 0x3F81; 4571129203Scognet } 4572129203Scognet return roundAndPackFloat32( aSign, aExp, zSig ); 4573129203Scognet 4574129203Scognet} 4575129203Scognet 4576129203Scognet/* 4577129203Scognet------------------------------------------------------------------------------- 4578129203ScognetReturns the result of converting the quadruple-precision floating-point 4579129203Scognetvalue `a' to the double-precision floating-point format. The conversion 4580129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4581129203ScognetArithmetic. 4582129203Scognet------------------------------------------------------------------------------- 4583129203Scognet*/ 4584129203Scognetfloat64 float128_to_float64( float128 a ) 4585129203Scognet{ 4586129203Scognet flag aSign; 4587129203Scognet int32 aExp; 4588129203Scognet bits64 aSig0, aSig1; 4589129203Scognet 4590129203Scognet aSig1 = extractFloat128Frac1( a ); 4591129203Scognet aSig0 = extractFloat128Frac0( a ); 4592129203Scognet aExp = extractFloat128Exp( a ); 4593129203Scognet aSign = extractFloat128Sign( a ); 4594129203Scognet if ( aExp == 0x7FFF ) { 4595129203Scognet if ( aSig0 | aSig1 ) { 4596129203Scognet return commonNaNToFloat64( float128ToCommonNaN( a ) ); 4597129203Scognet } 4598129203Scognet return packFloat64( aSign, 0x7FF, 0 ); 4599129203Scognet } 4600129203Scognet shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); 4601129203Scognet aSig0 |= ( aSig1 != 0 ); 4602129203Scognet if ( aExp || aSig0 ) { 4603129203Scognet aSig0 |= LIT64( 0x4000000000000000 ); 4604129203Scognet aExp -= 0x3C01; 4605129203Scognet } 4606129203Scognet return roundAndPackFloat64( aSign, aExp, aSig0 ); 4607129203Scognet 4608129203Scognet} 4609129203Scognet 4610129203Scognet#ifdef FLOATX80 4611129203Scognet 4612129203Scognet/* 4613129203Scognet------------------------------------------------------------------------------- 4614129203ScognetReturns the result of converting the quadruple-precision floating-point 4615129203Scognetvalue `a' to the extended double-precision floating-point format. The 4616129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 4617129203ScognetFloating-Point Arithmetic. 4618129203Scognet------------------------------------------------------------------------------- 4619129203Scognet*/ 4620129203Scognetfloatx80 float128_to_floatx80( float128 a ) 4621129203Scognet{ 4622129203Scognet flag aSign; 4623129203Scognet int32 aExp; 4624129203Scognet bits64 aSig0, aSig1; 4625129203Scognet 4626129203Scognet aSig1 = extractFloat128Frac1( a ); 4627129203Scognet aSig0 = extractFloat128Frac0( a ); 4628129203Scognet aExp = extractFloat128Exp( a ); 4629129203Scognet aSign = extractFloat128Sign( a ); 4630129203Scognet if ( aExp == 0x7FFF ) { 4631129203Scognet if ( aSig0 | aSig1 ) { 4632129203Scognet return commonNaNToFloatx80( float128ToCommonNaN( a ) ); 4633129203Scognet } 4634129203Scognet return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 4635129203Scognet } 4636129203Scognet if ( aExp == 0 ) { 4637129203Scognet if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); 4638129203Scognet normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 4639129203Scognet } 4640129203Scognet else { 4641129203Scognet aSig0 |= LIT64( 0x0001000000000000 ); 4642129203Scognet } 4643129203Scognet shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); 4644129203Scognet return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 ); 4645129203Scognet 4646129203Scognet} 4647129203Scognet 4648129203Scognet#endif 4649129203Scognet 4650129203Scognet/* 4651129203Scognet------------------------------------------------------------------------------- 4652129203ScognetRounds the quadruple-precision floating-point value `a' to an integer, and 4653129203Scognetreturns the result as a quadruple-precision floating-point value. The 4654129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary 4655129203ScognetFloating-Point Arithmetic. 4656129203Scognet------------------------------------------------------------------------------- 4657129203Scognet*/ 4658129203Scognetfloat128 float128_round_to_int( float128 a ) 4659129203Scognet{ 4660129203Scognet flag aSign; 4661129203Scognet int32 aExp; 4662129203Scognet bits64 lastBitMask, roundBitsMask; 4663129203Scognet int8 roundingMode; 4664129203Scognet float128 z; 4665129203Scognet 4666129203Scognet aExp = extractFloat128Exp( a ); 4667129203Scognet if ( 0x402F <= aExp ) { 4668129203Scognet if ( 0x406F <= aExp ) { 4669129203Scognet if ( ( aExp == 0x7FFF ) 4670129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) 4671129203Scognet ) { 4672129203Scognet return propagateFloat128NaN( a, a ); 4673129203Scognet } 4674129203Scognet return a; 4675129203Scognet } 4676129203Scognet lastBitMask = 1; 4677129203Scognet lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; 4678129203Scognet roundBitsMask = lastBitMask - 1; 4679129203Scognet z = a; 4680129203Scognet roundingMode = float_rounding_mode; 4681129203Scognet if ( roundingMode == float_round_nearest_even ) { 4682129203Scognet if ( lastBitMask ) { 4683129203Scognet add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); 4684129203Scognet if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 4685129203Scognet } 4686129203Scognet else { 4687129203Scognet if ( (sbits64) z.low < 0 ) { 4688129203Scognet ++z.high; 4689129203Scognet if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1; 4690129203Scognet } 4691129203Scognet } 4692129203Scognet } 4693129203Scognet else if ( roundingMode != float_round_to_zero ) { 4694129203Scognet if ( extractFloat128Sign( z ) 4695129203Scognet ^ ( roundingMode == float_round_up ) ) { 4696129203Scognet add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); 4697129203Scognet } 4698129203Scognet } 4699129203Scognet z.low &= ~ roundBitsMask; 4700129203Scognet } 4701129203Scognet else { 4702129203Scognet if ( aExp < 0x3FFF ) { 4703129203Scognet if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a; 4704129203Scognet float_exception_flags |= float_flag_inexact; 4705129203Scognet aSign = extractFloat128Sign( a ); 4706129203Scognet switch ( float_rounding_mode ) { 4707129203Scognet case float_round_nearest_even: 4708129203Scognet if ( ( aExp == 0x3FFE ) 4709129203Scognet && ( extractFloat128Frac0( a ) 4710129203Scognet | extractFloat128Frac1( a ) ) 4711129203Scognet ) { 4712129203Scognet return packFloat128( aSign, 0x3FFF, 0, 0 ); 4713129203Scognet } 4714129203Scognet break; 4715129203Scognet case float_round_to_zero: 4716129203Scognet break; 4717129203Scognet case float_round_down: 4718129203Scognet return 4719129203Scognet aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) 4720129203Scognet : packFloat128( 0, 0, 0, 0 ); 4721129203Scognet case float_round_up: 4722129203Scognet return 4723129203Scognet aSign ? packFloat128( 1, 0, 0, 0 ) 4724129203Scognet : packFloat128( 0, 0x3FFF, 0, 0 ); 4725129203Scognet } 4726129203Scognet return packFloat128( aSign, 0, 0, 0 ); 4727129203Scognet } 4728129203Scognet lastBitMask = 1; 4729129203Scognet lastBitMask <<= 0x402F - aExp; 4730129203Scognet roundBitsMask = lastBitMask - 1; 4731129203Scognet z.low = 0; 4732129203Scognet z.high = a.high; 4733129203Scognet roundingMode = float_rounding_mode; 4734129203Scognet if ( roundingMode == float_round_nearest_even ) { 4735129203Scognet z.high += lastBitMask>>1; 4736129203Scognet if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { 4737129203Scognet z.high &= ~ lastBitMask; 4738129203Scognet } 4739129203Scognet } 4740129203Scognet else if ( roundingMode != float_round_to_zero ) { 4741129203Scognet if ( extractFloat128Sign( z ) 4742129203Scognet ^ ( roundingMode == float_round_up ) ) { 4743129203Scognet z.high |= ( a.low != 0 ); 4744129203Scognet z.high += roundBitsMask; 4745129203Scognet } 4746129203Scognet } 4747129203Scognet z.high &= ~ roundBitsMask; 4748129203Scognet } 4749129203Scognet if ( ( z.low != a.low ) || ( z.high != a.high ) ) { 4750129203Scognet float_exception_flags |= float_flag_inexact; 4751129203Scognet } 4752129203Scognet return z; 4753129203Scognet 4754129203Scognet} 4755129203Scognet 4756129203Scognet/* 4757129203Scognet------------------------------------------------------------------------------- 4758129203ScognetReturns the result of adding the absolute values of the quadruple-precision 4759129203Scognetfloating-point values `a' and `b'. If `zSign' is 1, the sum is negated 4760129203Scognetbefore being returned. `zSign' is ignored if the result is a NaN. 4761129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary 4762129203ScognetFloating-Point Arithmetic. 4763129203Scognet------------------------------------------------------------------------------- 4764129203Scognet*/ 4765129203Scognetstatic float128 addFloat128Sigs( float128 a, float128 b, flag zSign ) 4766129203Scognet{ 4767129203Scognet int32 aExp, bExp, zExp; 4768129203Scognet bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 4769129203Scognet int32 expDiff; 4770129203Scognet 4771129203Scognet aSig1 = extractFloat128Frac1( a ); 4772129203Scognet aSig0 = extractFloat128Frac0( a ); 4773129203Scognet aExp = extractFloat128Exp( a ); 4774129203Scognet bSig1 = extractFloat128Frac1( b ); 4775129203Scognet bSig0 = extractFloat128Frac0( b ); 4776129203Scognet bExp = extractFloat128Exp( b ); 4777129203Scognet expDiff = aExp - bExp; 4778129203Scognet if ( 0 < expDiff ) { 4779129203Scognet if ( aExp == 0x7FFF ) { 4780129203Scognet if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 4781129203Scognet return a; 4782129203Scognet } 4783129203Scognet if ( bExp == 0 ) { 4784129203Scognet --expDiff; 4785129203Scognet } 4786129203Scognet else { 4787129203Scognet bSig0 |= LIT64( 0x0001000000000000 ); 4788129203Scognet } 4789129203Scognet shift128ExtraRightJamming( 4790129203Scognet bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); 4791129203Scognet zExp = aExp; 4792129203Scognet } 4793129203Scognet else if ( expDiff < 0 ) { 4794129203Scognet if ( bExp == 0x7FFF ) { 4795129203Scognet if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 4796129203Scognet return packFloat128( zSign, 0x7FFF, 0, 0 ); 4797129203Scognet } 4798129203Scognet if ( aExp == 0 ) { 4799129203Scognet ++expDiff; 4800129203Scognet } 4801129203Scognet else { 4802129203Scognet aSig0 |= LIT64( 0x0001000000000000 ); 4803129203Scognet } 4804129203Scognet shift128ExtraRightJamming( 4805129203Scognet aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); 4806129203Scognet zExp = bExp; 4807129203Scognet } 4808129203Scognet else { 4809129203Scognet if ( aExp == 0x7FFF ) { 4810129203Scognet if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 4811129203Scognet return propagateFloat128NaN( a, b ); 4812129203Scognet } 4813129203Scognet return a; 4814129203Scognet } 4815129203Scognet add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 4816129203Scognet if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 ); 4817129203Scognet zSig2 = 0; 4818129203Scognet zSig0 |= LIT64( 0x0002000000000000 ); 4819129203Scognet zExp = aExp; 4820129203Scognet goto shiftRight1; 4821129203Scognet } 4822129203Scognet aSig0 |= LIT64( 0x0001000000000000 ); 4823129203Scognet add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 4824129203Scognet --zExp; 4825129203Scognet if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; 4826129203Scognet ++zExp; 4827129203Scognet shiftRight1: 4828129203Scognet shift128ExtraRightJamming( 4829129203Scognet zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 4830129203Scognet roundAndPack: 4831129203Scognet return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 4832129203Scognet 4833129203Scognet} 4834129203Scognet 4835129203Scognet/* 4836129203Scognet------------------------------------------------------------------------------- 4837129203ScognetReturns the result of subtracting the absolute values of the quadruple- 4838129203Scognetprecision floating-point values `a' and `b'. If `zSign' is 1, the 4839129203Scognetdifference is negated before being returned. `zSign' is ignored if the 4840129203Scognetresult is a NaN. The subtraction is performed according to the IEC/IEEE 4841129203ScognetStandard for Binary Floating-Point Arithmetic. 4842129203Scognet------------------------------------------------------------------------------- 4843129203Scognet*/ 4844129203Scognetstatic float128 subFloat128Sigs( float128 a, float128 b, flag zSign ) 4845129203Scognet{ 4846129203Scognet int32 aExp, bExp, zExp; 4847129203Scognet bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; 4848129203Scognet int32 expDiff; 4849129203Scognet float128 z; 4850129203Scognet 4851129203Scognet aSig1 = extractFloat128Frac1( a ); 4852129203Scognet aSig0 = extractFloat128Frac0( a ); 4853129203Scognet aExp = extractFloat128Exp( a ); 4854129203Scognet bSig1 = extractFloat128Frac1( b ); 4855129203Scognet bSig0 = extractFloat128Frac0( b ); 4856129203Scognet bExp = extractFloat128Exp( b ); 4857129203Scognet expDiff = aExp - bExp; 4858129203Scognet shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); 4859129203Scognet shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); 4860129203Scognet if ( 0 < expDiff ) goto aExpBigger; 4861129203Scognet if ( expDiff < 0 ) goto bExpBigger; 4862129203Scognet if ( aExp == 0x7FFF ) { 4863129203Scognet if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 4864129203Scognet return propagateFloat128NaN( a, b ); 4865129203Scognet } 4866129203Scognet float_raise( float_flag_invalid ); 4867129203Scognet z.low = float128_default_nan_low; 4868129203Scognet z.high = float128_default_nan_high; 4869129203Scognet return z; 4870129203Scognet } 4871129203Scognet if ( aExp == 0 ) { 4872129203Scognet aExp = 1; 4873129203Scognet bExp = 1; 4874129203Scognet } 4875129203Scognet if ( bSig0 < aSig0 ) goto aBigger; 4876129203Scognet if ( aSig0 < bSig0 ) goto bBigger; 4877129203Scognet if ( bSig1 < aSig1 ) goto aBigger; 4878129203Scognet if ( aSig1 < bSig1 ) goto bBigger; 4879129203Scognet return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 ); 4880129203Scognet bExpBigger: 4881129203Scognet if ( bExp == 0x7FFF ) { 4882129203Scognet if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 4883129203Scognet return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); 4884129203Scognet } 4885129203Scognet if ( aExp == 0 ) { 4886129203Scognet ++expDiff; 4887129203Scognet } 4888129203Scognet else { 4889129203Scognet aSig0 |= LIT64( 0x4000000000000000 ); 4890129203Scognet } 4891129203Scognet shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 4892129203Scognet bSig0 |= LIT64( 0x4000000000000000 ); 4893129203Scognet bBigger: 4894129203Scognet sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); 4895129203Scognet zExp = bExp; 4896129203Scognet zSign ^= 1; 4897129203Scognet goto normalizeRoundAndPack; 4898129203Scognet aExpBigger: 4899129203Scognet if ( aExp == 0x7FFF ) { 4900129203Scognet if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 4901129203Scognet return a; 4902129203Scognet } 4903129203Scognet if ( bExp == 0 ) { 4904129203Scognet --expDiff; 4905129203Scognet } 4906129203Scognet else { 4907129203Scognet bSig0 |= LIT64( 0x4000000000000000 ); 4908129203Scognet } 4909129203Scognet shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); 4910129203Scognet aSig0 |= LIT64( 0x4000000000000000 ); 4911129203Scognet aBigger: 4912129203Scognet sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 4913129203Scognet zExp = aExp; 4914129203Scognet normalizeRoundAndPack: 4915129203Scognet --zExp; 4916129203Scognet return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 ); 4917129203Scognet 4918129203Scognet} 4919129203Scognet 4920129203Scognet/* 4921129203Scognet------------------------------------------------------------------------------- 4922129203ScognetReturns the result of adding the quadruple-precision floating-point values 4923129203Scognet`a' and `b'. The operation is performed according to the IEC/IEEE Standard 4924129203Scognetfor Binary Floating-Point Arithmetic. 4925129203Scognet------------------------------------------------------------------------------- 4926129203Scognet*/ 4927129203Scognetfloat128 float128_add( float128 a, float128 b ) 4928129203Scognet{ 4929129203Scognet flag aSign, bSign; 4930129203Scognet 4931129203Scognet aSign = extractFloat128Sign( a ); 4932129203Scognet bSign = extractFloat128Sign( b ); 4933129203Scognet if ( aSign == bSign ) { 4934129203Scognet return addFloat128Sigs( a, b, aSign ); 4935129203Scognet } 4936129203Scognet else { 4937129203Scognet return subFloat128Sigs( a, b, aSign ); 4938129203Scognet } 4939129203Scognet 4940129203Scognet} 4941129203Scognet 4942129203Scognet/* 4943129203Scognet------------------------------------------------------------------------------- 4944129203ScognetReturns the result of subtracting the quadruple-precision floating-point 4945129203Scognetvalues `a' and `b'. The operation is performed according to the IEC/IEEE 4946129203ScognetStandard for Binary Floating-Point Arithmetic. 4947129203Scognet------------------------------------------------------------------------------- 4948129203Scognet*/ 4949129203Scognetfloat128 float128_sub( float128 a, float128 b ) 4950129203Scognet{ 4951129203Scognet flag aSign, bSign; 4952129203Scognet 4953129203Scognet aSign = extractFloat128Sign( a ); 4954129203Scognet bSign = extractFloat128Sign( b ); 4955129203Scognet if ( aSign == bSign ) { 4956129203Scognet return subFloat128Sigs( a, b, aSign ); 4957129203Scognet } 4958129203Scognet else { 4959129203Scognet return addFloat128Sigs( a, b, aSign ); 4960129203Scognet } 4961129203Scognet 4962129203Scognet} 4963129203Scognet 4964129203Scognet/* 4965129203Scognet------------------------------------------------------------------------------- 4966129203ScognetReturns the result of multiplying the quadruple-precision floating-point 4967129203Scognetvalues `a' and `b'. The operation is performed according to the IEC/IEEE 4968129203ScognetStandard for Binary Floating-Point Arithmetic. 4969129203Scognet------------------------------------------------------------------------------- 4970129203Scognet*/ 4971129203Scognetfloat128 float128_mul( float128 a, float128 b ) 4972129203Scognet{ 4973129203Scognet flag aSign, bSign, zSign; 4974129203Scognet int32 aExp, bExp, zExp; 4975129203Scognet bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; 4976129203Scognet float128 z; 4977129203Scognet 4978129203Scognet aSig1 = extractFloat128Frac1( a ); 4979129203Scognet aSig0 = extractFloat128Frac0( a ); 4980129203Scognet aExp = extractFloat128Exp( a ); 4981129203Scognet aSign = extractFloat128Sign( a ); 4982129203Scognet bSig1 = extractFloat128Frac1( b ); 4983129203Scognet bSig0 = extractFloat128Frac0( b ); 4984129203Scognet bExp = extractFloat128Exp( b ); 4985129203Scognet bSign = extractFloat128Sign( b ); 4986129203Scognet zSign = aSign ^ bSign; 4987129203Scognet if ( aExp == 0x7FFF ) { 4988129203Scognet if ( ( aSig0 | aSig1 ) 4989129203Scognet || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { 4990129203Scognet return propagateFloat128NaN( a, b ); 4991129203Scognet } 4992129203Scognet if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; 4993129203Scognet return packFloat128( zSign, 0x7FFF, 0, 0 ); 4994129203Scognet } 4995129203Scognet if ( bExp == 0x7FFF ) { 4996129203Scognet if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 4997129203Scognet if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 4998129203Scognet invalid: 4999129203Scognet float_raise( float_flag_invalid ); 5000129203Scognet z.low = float128_default_nan_low; 5001129203Scognet z.high = float128_default_nan_high; 5002129203Scognet return z; 5003129203Scognet } 5004129203Scognet return packFloat128( zSign, 0x7FFF, 0, 0 ); 5005129203Scognet } 5006129203Scognet if ( aExp == 0 ) { 5007129203Scognet if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); 5008129203Scognet normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 5009129203Scognet } 5010129203Scognet if ( bExp == 0 ) { 5011129203Scognet if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); 5012129203Scognet normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 5013129203Scognet } 5014129203Scognet zExp = aExp + bExp - 0x4000; 5015129203Scognet aSig0 |= LIT64( 0x0001000000000000 ); 5016129203Scognet shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); 5017129203Scognet mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); 5018129203Scognet add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); 5019129203Scognet zSig2 |= ( zSig3 != 0 ); 5020129203Scognet if ( LIT64( 0x0002000000000000 ) <= zSig0 ) { 5021129203Scognet shift128ExtraRightJamming( 5022129203Scognet zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 5023129203Scognet ++zExp; 5024129203Scognet } 5025129203Scognet return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 5026129203Scognet 5027129203Scognet} 5028129203Scognet 5029129203Scognet/* 5030129203Scognet------------------------------------------------------------------------------- 5031129203ScognetReturns the result of dividing the quadruple-precision floating-point value 5032129203Scognet`a' by the corresponding value `b'. The operation is performed according to 5033129203Scognetthe IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5034129203Scognet------------------------------------------------------------------------------- 5035129203Scognet*/ 5036129203Scognetfloat128 float128_div( float128 a, float128 b ) 5037129203Scognet{ 5038129203Scognet flag aSign, bSign, zSign; 5039129203Scognet int32 aExp, bExp, zExp; 5040129203Scognet bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 5041129203Scognet bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 5042129203Scognet float128 z; 5043129203Scognet 5044129203Scognet aSig1 = extractFloat128Frac1( a ); 5045129203Scognet aSig0 = extractFloat128Frac0( a ); 5046129203Scognet aExp = extractFloat128Exp( a ); 5047129203Scognet aSign = extractFloat128Sign( a ); 5048129203Scognet bSig1 = extractFloat128Frac1( b ); 5049129203Scognet bSig0 = extractFloat128Frac0( b ); 5050129203Scognet bExp = extractFloat128Exp( b ); 5051129203Scognet bSign = extractFloat128Sign( b ); 5052129203Scognet zSign = aSign ^ bSign; 5053129203Scognet if ( aExp == 0x7FFF ) { 5054129203Scognet if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 5055129203Scognet if ( bExp == 0x7FFF ) { 5056129203Scognet if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 5057129203Scognet goto invalid; 5058129203Scognet } 5059129203Scognet return packFloat128( zSign, 0x7FFF, 0, 0 ); 5060129203Scognet } 5061129203Scognet if ( bExp == 0x7FFF ) { 5062129203Scognet if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 5063129203Scognet return packFloat128( zSign, 0, 0, 0 ); 5064129203Scognet } 5065129203Scognet if ( bExp == 0 ) { 5066129203Scognet if ( ( bSig0 | bSig1 ) == 0 ) { 5067129203Scognet if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 5068129203Scognet invalid: 5069129203Scognet float_raise( float_flag_invalid ); 5070129203Scognet z.low = float128_default_nan_low; 5071129203Scognet z.high = float128_default_nan_high; 5072129203Scognet return z; 5073129203Scognet } 5074129203Scognet float_raise( float_flag_divbyzero ); 5075129203Scognet return packFloat128( zSign, 0x7FFF, 0, 0 ); 5076129203Scognet } 5077129203Scognet normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 5078129203Scognet } 5079129203Scognet if ( aExp == 0 ) { 5080129203Scognet if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); 5081129203Scognet normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 5082129203Scognet } 5083129203Scognet zExp = aExp - bExp + 0x3FFD; 5084129203Scognet shortShift128Left( 5085129203Scognet aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 ); 5086129203Scognet shortShift128Left( 5087129203Scognet bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); 5088129203Scognet if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { 5089129203Scognet shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); 5090129203Scognet ++zExp; 5091129203Scognet } 5092129203Scognet zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); 5093129203Scognet mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); 5094129203Scognet sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); 5095129203Scognet while ( (sbits64) rem0 < 0 ) { 5096129203Scognet --zSig0; 5097129203Scognet add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); 5098129203Scognet } 5099129203Scognet zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); 5100129203Scognet if ( ( zSig1 & 0x3FFF ) <= 4 ) { 5101129203Scognet mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); 5102129203Scognet sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); 5103129203Scognet while ( (sbits64) rem1 < 0 ) { 5104129203Scognet --zSig1; 5105129203Scognet add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); 5106129203Scognet } 5107129203Scognet zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 5108129203Scognet } 5109129203Scognet shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); 5110129203Scognet return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 5111129203Scognet 5112129203Scognet} 5113129203Scognet 5114129203Scognet/* 5115129203Scognet------------------------------------------------------------------------------- 5116129203ScognetReturns the remainder of the quadruple-precision floating-point value `a' 5117129203Scognetwith respect to the corresponding value `b'. The operation is performed 5118129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5119129203Scognet------------------------------------------------------------------------------- 5120129203Scognet*/ 5121129203Scognetfloat128 float128_rem( float128 a, float128 b ) 5122129203Scognet{ 5123129203Scognet flag aSign, bSign, zSign; 5124129203Scognet int32 aExp, bExp, expDiff; 5125129203Scognet bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; 5126129203Scognet bits64 allZero, alternateASig0, alternateASig1, sigMean1; 5127129203Scognet sbits64 sigMean0; 5128129203Scognet float128 z; 5129129203Scognet 5130129203Scognet aSig1 = extractFloat128Frac1( a ); 5131129203Scognet aSig0 = extractFloat128Frac0( a ); 5132129203Scognet aExp = extractFloat128Exp( a ); 5133129203Scognet aSign = extractFloat128Sign( a ); 5134129203Scognet bSig1 = extractFloat128Frac1( b ); 5135129203Scognet bSig0 = extractFloat128Frac0( b ); 5136129203Scognet bExp = extractFloat128Exp( b ); 5137129203Scognet bSign = extractFloat128Sign( b ); 5138129203Scognet if ( aExp == 0x7FFF ) { 5139129203Scognet if ( ( aSig0 | aSig1 ) 5140129203Scognet || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { 5141129203Scognet return propagateFloat128NaN( a, b ); 5142129203Scognet } 5143129203Scognet goto invalid; 5144129203Scognet } 5145129203Scognet if ( bExp == 0x7FFF ) { 5146129203Scognet if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 5147129203Scognet return a; 5148129203Scognet } 5149129203Scognet if ( bExp == 0 ) { 5150129203Scognet if ( ( bSig0 | bSig1 ) == 0 ) { 5151129203Scognet invalid: 5152129203Scognet float_raise( float_flag_invalid ); 5153129203Scognet z.low = float128_default_nan_low; 5154129203Scognet z.high = float128_default_nan_high; 5155129203Scognet return z; 5156129203Scognet } 5157129203Scognet normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 5158129203Scognet } 5159129203Scognet if ( aExp == 0 ) { 5160129203Scognet if ( ( aSig0 | aSig1 ) == 0 ) return a; 5161129203Scognet normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 5162129203Scognet } 5163129203Scognet expDiff = aExp - bExp; 5164129203Scognet if ( expDiff < -1 ) return a; 5165129203Scognet shortShift128Left( 5166129203Scognet aSig0 | LIT64( 0x0001000000000000 ), 5167129203Scognet aSig1, 5168129203Scognet 15 - ( expDiff < 0 ), 5169129203Scognet &aSig0, 5170129203Scognet &aSig1 5171129203Scognet ); 5172129203Scognet shortShift128Left( 5173129203Scognet bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); 5174129203Scognet q = le128( bSig0, bSig1, aSig0, aSig1 ); 5175129203Scognet if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 5176129203Scognet expDiff -= 64; 5177129203Scognet while ( 0 < expDiff ) { 5178129203Scognet q = estimateDiv128To64( aSig0, aSig1, bSig0 ); 5179129203Scognet q = ( 4 < q ) ? q - 4 : 0; 5180129203Scognet mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); 5181129203Scognet shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); 5182129203Scognet shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); 5183129203Scognet sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); 5184129203Scognet expDiff -= 61; 5185129203Scognet } 5186129203Scognet if ( -64 < expDiff ) { 5187129203Scognet q = estimateDiv128To64( aSig0, aSig1, bSig0 ); 5188129203Scognet q = ( 4 < q ) ? q - 4 : 0; 5189129203Scognet q >>= - expDiff; 5190129203Scognet shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); 5191129203Scognet expDiff += 52; 5192129203Scognet if ( expDiff < 0 ) { 5193129203Scognet shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 5194129203Scognet } 5195129203Scognet else { 5196129203Scognet shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); 5197129203Scognet } 5198129203Scognet mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); 5199129203Scognet sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); 5200129203Scognet } 5201129203Scognet else { 5202129203Scognet shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); 5203129203Scognet shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); 5204129203Scognet } 5205129203Scognet do { 5206129203Scognet alternateASig0 = aSig0; 5207129203Scognet alternateASig1 = aSig1; 5208129203Scognet ++q; 5209129203Scognet sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 5210129203Scognet } while ( 0 <= (sbits64) aSig0 ); 5211129203Scognet add128( 5212230363Sdas aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 ); 5213129203Scognet if ( ( sigMean0 < 0 ) 5214129203Scognet || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { 5215129203Scognet aSig0 = alternateASig0; 5216129203Scognet aSig1 = alternateASig1; 5217129203Scognet } 5218129203Scognet zSign = ( (sbits64) aSig0 < 0 ); 5219129203Scognet if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); 5220129203Scognet return 5221129203Scognet normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); 5222129203Scognet 5223129203Scognet} 5224129203Scognet 5225129203Scognet/* 5226129203Scognet------------------------------------------------------------------------------- 5227129203ScognetReturns the square root of the quadruple-precision floating-point value `a'. 5228129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary 5229129203ScognetFloating-Point Arithmetic. 5230129203Scognet------------------------------------------------------------------------------- 5231129203Scognet*/ 5232129203Scognetfloat128 float128_sqrt( float128 a ) 5233129203Scognet{ 5234129203Scognet flag aSign; 5235129203Scognet int32 aExp, zExp; 5236129203Scognet bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; 5237129203Scognet bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 5238129203Scognet float128 z; 5239129203Scognet 5240129203Scognet aSig1 = extractFloat128Frac1( a ); 5241129203Scognet aSig0 = extractFloat128Frac0( a ); 5242129203Scognet aExp = extractFloat128Exp( a ); 5243129203Scognet aSign = extractFloat128Sign( a ); 5244129203Scognet if ( aExp == 0x7FFF ) { 5245129203Scognet if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a ); 5246129203Scognet if ( ! aSign ) return a; 5247129203Scognet goto invalid; 5248129203Scognet } 5249129203Scognet if ( aSign ) { 5250129203Scognet if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; 5251129203Scognet invalid: 5252129203Scognet float_raise( float_flag_invalid ); 5253129203Scognet z.low = float128_default_nan_low; 5254129203Scognet z.high = float128_default_nan_high; 5255129203Scognet return z; 5256129203Scognet } 5257129203Scognet if ( aExp == 0 ) { 5258129203Scognet if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); 5259129203Scognet normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 5260129203Scognet } 5261129203Scognet zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE; 5262129203Scognet aSig0 |= LIT64( 0x0001000000000000 ); 5263129203Scognet zSig0 = estimateSqrt32( aExp, aSig0>>17 ); 5264129203Scognet shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); 5265129203Scognet zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); 5266129203Scognet doubleZSig0 = zSig0<<1; 5267129203Scognet mul64To128( zSig0, zSig0, &term0, &term1 ); 5268129203Scognet sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 5269129203Scognet while ( (sbits64) rem0 < 0 ) { 5270129203Scognet --zSig0; 5271129203Scognet doubleZSig0 -= 2; 5272129203Scognet add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); 5273129203Scognet } 5274129203Scognet zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); 5275129203Scognet if ( ( zSig1 & 0x1FFF ) <= 5 ) { 5276129203Scognet if ( zSig1 == 0 ) zSig1 = 1; 5277129203Scognet mul64To128( doubleZSig0, zSig1, &term1, &term2 ); 5278129203Scognet sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 5279129203Scognet mul64To128( zSig1, zSig1, &term2, &term3 ); 5280129203Scognet sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 5281129203Scognet while ( (sbits64) rem1 < 0 ) { 5282129203Scognet --zSig1; 5283129203Scognet shortShift128Left( 0, zSig1, 1, &term2, &term3 ); 5284129203Scognet term3 |= 1; 5285129203Scognet term2 |= doubleZSig0; 5286129203Scognet add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 5287129203Scognet } 5288129203Scognet zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 5289129203Scognet } 5290129203Scognet shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 ); 5291129203Scognet return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 ); 5292129203Scognet 5293129203Scognet} 5294129203Scognet 5295129203Scognet/* 5296129203Scognet------------------------------------------------------------------------------- 5297129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is equal to 5298129203Scognetthe corresponding value `b', and 0 otherwise. The comparison is performed 5299129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5300129203Scognet------------------------------------------------------------------------------- 5301129203Scognet*/ 5302129203Scognetflag float128_eq( float128 a, float128 b ) 5303129203Scognet{ 5304129203Scognet 5305129203Scognet if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5306129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5307129203Scognet || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5308129203Scognet && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5309129203Scognet ) { 5310129203Scognet if ( float128_is_signaling_nan( a ) 5311129203Scognet || float128_is_signaling_nan( b ) ) { 5312129203Scognet float_raise( float_flag_invalid ); 5313129203Scognet } 5314129203Scognet return 0; 5315129203Scognet } 5316129203Scognet return 5317129203Scognet ( a.low == b.low ) 5318129203Scognet && ( ( a.high == b.high ) 5319129203Scognet || ( ( a.low == 0 ) 5320129203Scognet && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) 5321129203Scognet ); 5322129203Scognet 5323129203Scognet} 5324129203Scognet 5325129203Scognet/* 5326129203Scognet------------------------------------------------------------------------------- 5327129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than 5328129203Scognetor equal to the corresponding value `b', and 0 otherwise. The comparison 5329129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 5330129203ScognetArithmetic. 5331129203Scognet------------------------------------------------------------------------------- 5332129203Scognet*/ 5333129203Scognetflag float128_le( float128 a, float128 b ) 5334129203Scognet{ 5335129203Scognet flag aSign, bSign; 5336129203Scognet 5337129203Scognet if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5338129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5339129203Scognet || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5340129203Scognet && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5341129203Scognet ) { 5342129203Scognet float_raise( float_flag_invalid ); 5343129203Scognet return 0; 5344129203Scognet } 5345129203Scognet aSign = extractFloat128Sign( a ); 5346129203Scognet bSign = extractFloat128Sign( b ); 5347129203Scognet if ( aSign != bSign ) { 5348129203Scognet return 5349129203Scognet aSign 5350129203Scognet || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 5351129203Scognet == 0 ); 5352129203Scognet } 5353129203Scognet return 5354129203Scognet aSign ? le128( b.high, b.low, a.high, a.low ) 5355129203Scognet : le128( a.high, a.low, b.high, b.low ); 5356129203Scognet 5357129203Scognet} 5358129203Scognet 5359129203Scognet/* 5360129203Scognet------------------------------------------------------------------------------- 5361129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than 5362129203Scognetthe corresponding value `b', and 0 otherwise. The comparison is performed 5363129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5364129203Scognet------------------------------------------------------------------------------- 5365129203Scognet*/ 5366129203Scognetflag float128_lt( float128 a, float128 b ) 5367129203Scognet{ 5368129203Scognet flag aSign, bSign; 5369129203Scognet 5370129203Scognet if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5371129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5372129203Scognet || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5373129203Scognet && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5374129203Scognet ) { 5375129203Scognet float_raise( float_flag_invalid ); 5376129203Scognet return 0; 5377129203Scognet } 5378129203Scognet aSign = extractFloat128Sign( a ); 5379129203Scognet bSign = extractFloat128Sign( b ); 5380129203Scognet if ( aSign != bSign ) { 5381129203Scognet return 5382129203Scognet aSign 5383129203Scognet && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 5384129203Scognet != 0 ); 5385129203Scognet } 5386129203Scognet return 5387129203Scognet aSign ? lt128( b.high, b.low, a.high, a.low ) 5388129203Scognet : lt128( a.high, a.low, b.high, b.low ); 5389129203Scognet 5390129203Scognet} 5391129203Scognet 5392129203Scognet/* 5393129203Scognet------------------------------------------------------------------------------- 5394129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is equal to 5395129203Scognetthe corresponding value `b', and 0 otherwise. The invalid exception is 5396129203Scognetraised if either operand is a NaN. Otherwise, the comparison is performed 5397129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5398129203Scognet------------------------------------------------------------------------------- 5399129203Scognet*/ 5400129203Scognetflag float128_eq_signaling( float128 a, float128 b ) 5401129203Scognet{ 5402129203Scognet 5403129203Scognet if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5404129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5405129203Scognet || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5406129203Scognet && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5407129203Scognet ) { 5408129203Scognet float_raise( float_flag_invalid ); 5409129203Scognet return 0; 5410129203Scognet } 5411129203Scognet return 5412129203Scognet ( a.low == b.low ) 5413129203Scognet && ( ( a.high == b.high ) 5414129203Scognet || ( ( a.low == 0 ) 5415129203Scognet && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) 5416129203Scognet ); 5417129203Scognet 5418129203Scognet} 5419129203Scognet 5420129203Scognet/* 5421129203Scognet------------------------------------------------------------------------------- 5422129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than 5423129203Scognetor equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 5424129203Scognetcause an exception. Otherwise, the comparison is performed according to the 5425129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 5426129203Scognet------------------------------------------------------------------------------- 5427129203Scognet*/ 5428129203Scognetflag float128_le_quiet( float128 a, float128 b ) 5429129203Scognet{ 5430129203Scognet flag aSign, bSign; 5431129203Scognet 5432129203Scognet if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5433129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5434129203Scognet || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5435129203Scognet && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5436129203Scognet ) { 5437129203Scognet if ( float128_is_signaling_nan( a ) 5438129203Scognet || float128_is_signaling_nan( b ) ) { 5439129203Scognet float_raise( float_flag_invalid ); 5440129203Scognet } 5441129203Scognet return 0; 5442129203Scognet } 5443129203Scognet aSign = extractFloat128Sign( a ); 5444129203Scognet bSign = extractFloat128Sign( b ); 5445129203Scognet if ( aSign != bSign ) { 5446129203Scognet return 5447129203Scognet aSign 5448129203Scognet || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 5449129203Scognet == 0 ); 5450129203Scognet } 5451129203Scognet return 5452129203Scognet aSign ? le128( b.high, b.low, a.high, a.low ) 5453129203Scognet : le128( a.high, a.low, b.high, b.low ); 5454129203Scognet 5455129203Scognet} 5456129203Scognet 5457129203Scognet/* 5458129203Scognet------------------------------------------------------------------------------- 5459129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than 5460129203Scognetthe corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 5461129203Scognetexception. Otherwise, the comparison is performed according to the IEC/IEEE 5462129203ScognetStandard for Binary Floating-Point Arithmetic. 5463129203Scognet------------------------------------------------------------------------------- 5464129203Scognet*/ 5465129203Scognetflag float128_lt_quiet( float128 a, float128 b ) 5466129203Scognet{ 5467129203Scognet flag aSign, bSign; 5468129203Scognet 5469129203Scognet if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5470129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5471129203Scognet || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5472129203Scognet && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5473129203Scognet ) { 5474129203Scognet if ( float128_is_signaling_nan( a ) 5475129203Scognet || float128_is_signaling_nan( b ) ) { 5476129203Scognet float_raise( float_flag_invalid ); 5477129203Scognet } 5478129203Scognet return 0; 5479129203Scognet } 5480129203Scognet aSign = extractFloat128Sign( a ); 5481129203Scognet bSign = extractFloat128Sign( b ); 5482129203Scognet if ( aSign != bSign ) { 5483129203Scognet return 5484129203Scognet aSign 5485129203Scognet && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 5486129203Scognet != 0 ); 5487129203Scognet } 5488129203Scognet return 5489129203Scognet aSign ? lt128( b.high, b.low, a.high, a.low ) 5490129203Scognet : lt128( a.high, a.low, b.high, b.low ); 5491129203Scognet 5492129203Scognet} 5493129203Scognet 5494129203Scognet#endif 5495129203Scognet 5496129203Scognet 5497129203Scognet#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS) 5498129203Scognet 5499129203Scognet/* 5500129203Scognet * These two routines are not part of the original softfloat distribution. 5501129203Scognet * 5502129203Scognet * They are based on the corresponding conversions to integer but return 5503129203Scognet * unsigned numbers instead since these functions are required by GCC. 5504129203Scognet * 5505129203Scognet * Added by Mark Brinicombe <mark@NetBSD.org> 27/09/97 5506129203Scognet * 5507129203Scognet * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15] 5508129203Scognet */ 5509129203Scognet 5510129203Scognet/* 5511129203Scognet------------------------------------------------------------------------------- 5512129203ScognetReturns the result of converting the double-precision floating-point value 5513129203Scognet`a' to the 32-bit unsigned integer format. The conversion is 5514129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-point 5515129203ScognetArithmetic, except that the conversion is always rounded toward zero. If 5516129203Scognet`a' is a NaN, the largest positive integer is returned. If the conversion 5517129203Scognetoverflows, the largest integer positive is returned. 5518129203Scognet------------------------------------------------------------------------------- 5519129203Scognet*/ 5520129203Scognetuint32 float64_to_uint32_round_to_zero( float64 a ) 5521129203Scognet{ 5522129203Scognet flag aSign; 5523129203Scognet int16 aExp, shiftCount; 5524129203Scognet bits64 aSig, savedASig; 5525129203Scognet uint32 z; 5526129203Scognet 5527129203Scognet aSig = extractFloat64Frac( a ); 5528129203Scognet aExp = extractFloat64Exp( a ); 5529129203Scognet aSign = extractFloat64Sign( a ); 5530129203Scognet 5531129203Scognet if (aSign) { 5532129203Scognet float_raise( float_flag_invalid ); 5533129203Scognet return(0); 5534129203Scognet } 5535129203Scognet 5536129203Scognet if ( 0x41E < aExp ) { 5537129203Scognet float_raise( float_flag_invalid ); 5538129203Scognet return 0xffffffff; 5539129203Scognet } 5540129203Scognet else if ( aExp < 0x3FF ) { 5541129203Scognet if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; 5542129203Scognet return 0; 5543129203Scognet } 5544129203Scognet aSig |= LIT64( 0x0010000000000000 ); 5545129203Scognet shiftCount = 0x433 - aExp; 5546129203Scognet savedASig = aSig; 5547129203Scognet aSig >>= shiftCount; 5548129203Scognet z = aSig; 5549129203Scognet if ( ( aSig<<shiftCount ) != savedASig ) { 5550129203Scognet float_exception_flags |= float_flag_inexact; 5551129203Scognet } 5552129203Scognet return z; 5553129203Scognet 5554129203Scognet} 5555129203Scognet 5556129203Scognet/* 5557129203Scognet------------------------------------------------------------------------------- 5558129203ScognetReturns the result of converting the single-precision floating-point value 5559129203Scognet`a' to the 32-bit unsigned integer format. The conversion is 5560129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-point 5561129203ScognetArithmetic, except that the conversion is always rounded toward zero. If 5562129203Scognet`a' is a NaN, the largest positive integer is returned. If the conversion 5563129203Scognetoverflows, the largest positive integer is returned. 5564129203Scognet------------------------------------------------------------------------------- 5565129203Scognet*/ 5566129203Scognetuint32 float32_to_uint32_round_to_zero( float32 a ) 5567129203Scognet{ 5568129203Scognet flag aSign; 5569129203Scognet int16 aExp, shiftCount; 5570129203Scognet bits32 aSig; 5571129203Scognet uint32 z; 5572129203Scognet 5573129203Scognet aSig = extractFloat32Frac( a ); 5574129203Scognet aExp = extractFloat32Exp( a ); 5575129203Scognet aSign = extractFloat32Sign( a ); 5576129203Scognet shiftCount = aExp - 0x9E; 5577129203Scognet 5578129203Scognet if (aSign) { 5579129203Scognet float_raise( float_flag_invalid ); 5580129203Scognet return(0); 5581129203Scognet } 5582129203Scognet if ( 0 < shiftCount ) { 5583129203Scognet float_raise( float_flag_invalid ); 5584129203Scognet return 0xFFFFFFFF; 5585129203Scognet } 5586129203Scognet else if ( aExp <= 0x7E ) { 5587129203Scognet if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 5588129203Scognet return 0; 5589129203Scognet } 5590129203Scognet aSig = ( aSig | 0x800000 )<<8; 5591129203Scognet z = aSig>>( - shiftCount ); 5592129203Scognet if ( aSig<<( shiftCount & 31 ) ) { 5593129203Scognet float_exception_flags |= float_flag_inexact; 5594129203Scognet } 5595129203Scognet return z; 5596129203Scognet 5597129203Scognet} 5598129203Scognet 5599129203Scognet#endif 5600