1129203Scognet/* $NetBSD: softfloat.c,v 1.2 2003/07/26 19:24:52 salo Exp $ */ 2129203Scognet 3129203Scognet/* 4129203Scognet * This version hacked for use with gcc -msoft-float by bjh21. 5129203Scognet * (Mostly a case of #ifdefing out things GCC doesn't need or provides 6129203Scognet * itself). 7129203Scognet */ 8129203Scognet 9129203Scognet/* 10129203Scognet * Things you may want to define: 11129203Scognet * 12129203Scognet * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with 13129203Scognet * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them 14129203Scognet * properly renamed. 15129203Scognet */ 16129203Scognet 17129203Scognet/* 18129203Scognet=============================================================================== 19129203Scognet 20129203ScognetThis C source file is part of the SoftFloat IEC/IEEE Floating-point 21129203ScognetArithmetic Package, Release 2a. 22129203Scognet 23129203ScognetWritten by John R. Hauser. This work was made possible in part by the 24129203ScognetInternational Computer Science Institute, located at Suite 600, 1947 Center 25129203ScognetStreet, Berkeley, California 94704. Funding was partially provided by the 26129203ScognetNational Science Foundation under grant MIP-9311980. The original version 27129203Scognetof this code was written as part of a project to build a fixed-point vector 28129203Scognetprocessor in collaboration with the University of California at Berkeley, 29129203Scognetoverseen by Profs. Nelson Morgan and John Wawrzynek. More information 30129203Scognetis available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 31129203Scognetarithmetic/SoftFloat.html'. 32129203Scognet 33129203ScognetTHIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 34129203Scognethas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 35129203ScognetTIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 36129203ScognetPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 37129203ScognetAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 38129203Scognet 39129203ScognetDerivative works are acceptable, even for commercial purposes, so long as 40129203Scognet(1) they include prominent notice that the work is derivative, and (2) they 41129203Scognetinclude prominent notice akin to these four paragraphs for those parts of 42129203Scognetthis code that are retained. 43129203Scognet 44129203Scognet=============================================================================== 45129203Scognet*/ 46129203Scognet 47129203Scognet#include <sys/cdefs.h> 48129203Scognet__FBSDID("$FreeBSD$"); 49129203Scognet 50129203Scognet#ifdef SOFTFLOAT_FOR_GCC 51129203Scognet#include "softfloat-for-gcc.h" 52129203Scognet#endif 53129203Scognet 54129203Scognet#include "milieu.h" 55129203Scognet#include "softfloat.h" 56129203Scognet 57129203Scognet/* 58129203Scognet * Conversions between floats as stored in memory and floats as 59129203Scognet * SoftFloat uses them 60129203Scognet */ 61129203Scognet#ifndef FLOAT64_DEMANGLE 62129203Scognet#define FLOAT64_DEMANGLE(a) (a) 63129203Scognet#endif 64129203Scognet#ifndef FLOAT64_MANGLE 65129203Scognet#define FLOAT64_MANGLE(a) (a) 66129203Scognet#endif 67129203Scognet 68129203Scognet/* 69129203Scognet------------------------------------------------------------------------------- 70129203ScognetFloating-point rounding mode, extended double-precision rounding precision, 71129203Scognetand exception flags. 72129203Scognet------------------------------------------------------------------------------- 73129203Scognet*/ 74129203Scognetfp_rnd_t float_rounding_mode = float_round_nearest_even; 75129203Scognetfp_except float_exception_flags = 0; 76129203Scognet#ifdef FLOATX80 77129203Scognetint8 floatx80_rounding_precision = 80; 78129203Scognet#endif 79129203Scognet 80129203Scognet/* 81129203Scognet------------------------------------------------------------------------------- 82129203ScognetPrimitive arithmetic functions, including multi-word arithmetic, and 83129203Scognetdivision and square root approximations. (Can be specialized to target if 84129203Scognetdesired.) 85129203Scognet------------------------------------------------------------------------------- 86129203Scognet*/ 87129203Scognet#include "softfloat-macros" 88129203Scognet 89129203Scognet/* 90129203Scognet------------------------------------------------------------------------------- 91129203ScognetFunctions and definitions to determine: (1) whether tininess for underflow 92129203Scognetis detected before or after rounding by default, (2) what (if anything) 93129203Scognethappens when exceptions are raised, (3) how signaling NaNs are distinguished 94129203Scognetfrom quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs 95129203Scognetare propagated from function inputs to output. These details are target- 96129203Scognetspecific. 97129203Scognet------------------------------------------------------------------------------- 98129203Scognet*/ 99129203Scognet#include "softfloat-specialize" 100129203Scognet 101129203Scognet#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128) 102129203Scognet/* 103129203Scognet------------------------------------------------------------------------------- 104129203ScognetTakes a 64-bit fixed-point value `absZ' with binary point between bits 6 105129203Scognetand 7, and returns the properly rounded 32-bit integer corresponding to the 106129203Scognetinput. If `zSign' is 1, the input is negated before being converted to an 107129203Scognetinteger. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input 108129203Scognetis simply rounded to an integer, with the inexact exception raised if the 109129203Scognetinput cannot be represented exactly as an integer. However, if the fixed- 110129203Scognetpoint input is too large, the invalid exception is raised and the largest 111129203Scognetpositive or negative integer is returned. 112129203Scognet------------------------------------------------------------------------------- 113129203Scognet*/ 114129203Scognetstatic int32 roundAndPackInt32( flag zSign, bits64 absZ ) 115129203Scognet{ 116129203Scognet int8 roundingMode; 117129203Scognet flag roundNearestEven; 118129203Scognet int8 roundIncrement, roundBits; 119129203Scognet int32 z; 120129203Scognet 121129203Scognet roundingMode = float_rounding_mode; 122129203Scognet roundNearestEven = ( roundingMode == float_round_nearest_even ); 123129203Scognet roundIncrement = 0x40; 124129203Scognet if ( ! roundNearestEven ) { 125129203Scognet if ( roundingMode == float_round_to_zero ) { 126129203Scognet roundIncrement = 0; 127129203Scognet } 128129203Scognet else { 129129203Scognet roundIncrement = 0x7F; 130129203Scognet if ( zSign ) { 131129203Scognet if ( roundingMode == float_round_up ) roundIncrement = 0; 132129203Scognet } 133129203Scognet else { 134129203Scognet if ( roundingMode == float_round_down ) roundIncrement = 0; 135129203Scognet } 136129203Scognet } 137129203Scognet } 138129203Scognet roundBits = absZ & 0x7F; 139129203Scognet absZ = ( absZ + roundIncrement )>>7; 140129203Scognet absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 141129203Scognet z = absZ; 142129203Scognet if ( zSign ) z = - z; 143129203Scognet if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { 144129203Scognet float_raise( float_flag_invalid ); 145129203Scognet return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 146129203Scognet } 147129203Scognet if ( roundBits ) float_exception_flags |= float_flag_inexact; 148129203Scognet return z; 149129203Scognet 150129203Scognet} 151129203Scognet 152129203Scognet/* 153129203Scognet------------------------------------------------------------------------------- 154129203ScognetTakes the 128-bit fixed-point value formed by concatenating `absZ0' and 155129203Scognet`absZ1', with binary point between bits 63 and 64 (between the input words), 156129203Scognetand returns the properly rounded 64-bit integer corresponding to the input. 157129203ScognetIf `zSign' is 1, the input is negated before being converted to an integer. 158129203ScognetOrdinarily, the fixed-point input is simply rounded to an integer, with 159129203Scognetthe inexact exception raised if the input cannot be represented exactly as 160129203Scognetan integer. However, if the fixed-point input is too large, the invalid 161129203Scognetexception is raised and the largest positive or negative integer is 162129203Scognetreturned. 163129203Scognet------------------------------------------------------------------------------- 164129203Scognet*/ 165129203Scognetstatic int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 ) 166129203Scognet{ 167129203Scognet int8 roundingMode; 168129203Scognet flag roundNearestEven, increment; 169129203Scognet int64 z; 170129203Scognet 171129203Scognet roundingMode = float_rounding_mode; 172129203Scognet roundNearestEven = ( roundingMode == float_round_nearest_even ); 173129203Scognet increment = ( (sbits64) absZ1 < 0 ); 174129203Scognet if ( ! roundNearestEven ) { 175129203Scognet if ( roundingMode == float_round_to_zero ) { 176129203Scognet increment = 0; 177129203Scognet } 178129203Scognet else { 179129203Scognet if ( zSign ) { 180129203Scognet increment = ( roundingMode == float_round_down ) && absZ1; 181129203Scognet } 182129203Scognet else { 183129203Scognet increment = ( roundingMode == float_round_up ) && absZ1; 184129203Scognet } 185129203Scognet } 186129203Scognet } 187129203Scognet if ( increment ) { 188129203Scognet ++absZ0; 189129203Scognet if ( absZ0 == 0 ) goto overflow; 190129203Scognet absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven ); 191129203Scognet } 192129203Scognet z = absZ0; 193129203Scognet if ( zSign ) z = - z; 194129203Scognet if ( z && ( ( z < 0 ) ^ zSign ) ) { 195129203Scognet overflow: 196129203Scognet float_raise( float_flag_invalid ); 197129203Scognet return 198129203Scognet zSign ? (sbits64) LIT64( 0x8000000000000000 ) 199129203Scognet : LIT64( 0x7FFFFFFFFFFFFFFF ); 200129203Scognet } 201129203Scognet if ( absZ1 ) float_exception_flags |= float_flag_inexact; 202129203Scognet return z; 203129203Scognet 204129203Scognet} 205129203Scognet#endif 206129203Scognet 207129203Scognet/* 208129203Scognet------------------------------------------------------------------------------- 209129203ScognetReturns the fraction bits of the single-precision floating-point value `a'. 210129203Scognet------------------------------------------------------------------------------- 211129203Scognet*/ 212129203ScognetINLINE bits32 extractFloat32Frac( float32 a ) 213129203Scognet{ 214129203Scognet 215129203Scognet return a & 0x007FFFFF; 216129203Scognet 217129203Scognet} 218129203Scognet 219129203Scognet/* 220129203Scognet------------------------------------------------------------------------------- 221129203ScognetReturns the exponent bits of the single-precision floating-point value `a'. 222129203Scognet------------------------------------------------------------------------------- 223129203Scognet*/ 224129203ScognetINLINE int16 extractFloat32Exp( float32 a ) 225129203Scognet{ 226129203Scognet 227129203Scognet return ( a>>23 ) & 0xFF; 228129203Scognet 229129203Scognet} 230129203Scognet 231129203Scognet/* 232129203Scognet------------------------------------------------------------------------------- 233129203ScognetReturns the sign bit of the single-precision floating-point value `a'. 234129203Scognet------------------------------------------------------------------------------- 235129203Scognet*/ 236129203ScognetINLINE flag extractFloat32Sign( float32 a ) 237129203Scognet{ 238129203Scognet 239129203Scognet return a>>31; 240129203Scognet 241129203Scognet} 242129203Scognet 243129203Scognet/* 244129203Scognet------------------------------------------------------------------------------- 245129203ScognetNormalizes the subnormal single-precision floating-point value represented 246129203Scognetby the denormalized significand `aSig'. The normalized exponent and 247129203Scognetsignificand are stored at the locations pointed to by `zExpPtr' and 248129203Scognet`zSigPtr', respectively. 249129203Scognet------------------------------------------------------------------------------- 250129203Scognet*/ 251129203Scognetstatic void 252129203Scognet normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) 253129203Scognet{ 254129203Scognet int8 shiftCount; 255129203Scognet 256129203Scognet shiftCount = countLeadingZeros32( aSig ) - 8; 257129203Scognet *zSigPtr = aSig<<shiftCount; 258129203Scognet *zExpPtr = 1 - shiftCount; 259129203Scognet 260129203Scognet} 261129203Scognet 262129203Scognet/* 263129203Scognet------------------------------------------------------------------------------- 264129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into a 265129203Scognetsingle-precision floating-point value, returning the result. After being 266129203Scognetshifted into the proper positions, the three fields are simply added 267129203Scognettogether to form the result. This means that any integer portion of `zSig' 268129203Scognetwill be added into the exponent. Since a properly normalized significand 269129203Scognetwill have an integer portion equal to 1, the `zExp' input should be 1 less 270129203Scognetthan the desired result exponent whenever `zSig' is a complete, normalized 271129203Scognetsignificand. 272129203Scognet------------------------------------------------------------------------------- 273129203Scognet*/ 274129203ScognetINLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) 275129203Scognet{ 276129203Scognet 277129203Scognet return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; 278129203Scognet 279129203Scognet} 280129203Scognet 281129203Scognet/* 282129203Scognet------------------------------------------------------------------------------- 283129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 284129203Scognetand significand `zSig', and returns the proper single-precision floating- 285129203Scognetpoint value corresponding to the abstract input. Ordinarily, the abstract 286129203Scognetvalue is simply rounded and packed into the single-precision format, with 287129203Scognetthe inexact exception raised if the abstract input cannot be represented 288129203Scognetexactly. However, if the abstract value is too large, the overflow and 289129203Scognetinexact exceptions are raised and an infinity or maximal finite value is 290129203Scognetreturned. If the abstract value is too small, the input value is rounded to 291129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if 292129203Scognetthe abstract input cannot be represented exactly as a subnormal single- 293129203Scognetprecision floating-point number. 294129203Scognet The input significand `zSig' has its binary point between bits 30 295129203Scognetand 29, which is 7 bits to the left of the usual location. This shifted 296129203Scognetsignificand must be normalized or smaller. If `zSig' is not normalized, 297129203Scognet`zExp' must be 0; in that case, the result returned is a subnormal number, 298129203Scognetand it must not require rounding. In the usual case that `zSig' is 299129203Scognetnormalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 300129203ScognetThe handling of underflow and overflow follows the IEC/IEEE Standard for 301129203ScognetBinary Floating-Point Arithmetic. 302129203Scognet------------------------------------------------------------------------------- 303129203Scognet*/ 304129203Scognetstatic float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 305129203Scognet{ 306129203Scognet int8 roundingMode; 307129203Scognet flag roundNearestEven; 308129203Scognet int8 roundIncrement, roundBits; 309129203Scognet flag isTiny; 310129203Scognet 311129203Scognet roundingMode = float_rounding_mode; 312129203Scognet roundNearestEven = ( roundingMode == float_round_nearest_even ); 313129203Scognet roundIncrement = 0x40; 314129203Scognet if ( ! roundNearestEven ) { 315129203Scognet if ( roundingMode == float_round_to_zero ) { 316129203Scognet roundIncrement = 0; 317129203Scognet } 318129203Scognet else { 319129203Scognet roundIncrement = 0x7F; 320129203Scognet if ( zSign ) { 321129203Scognet if ( roundingMode == float_round_up ) roundIncrement = 0; 322129203Scognet } 323129203Scognet else { 324129203Scognet if ( roundingMode == float_round_down ) roundIncrement = 0; 325129203Scognet } 326129203Scognet } 327129203Scognet } 328129203Scognet roundBits = zSig & 0x7F; 329129203Scognet if ( 0xFD <= (bits16) zExp ) { 330129203Scognet if ( ( 0xFD < zExp ) 331129203Scognet || ( ( zExp == 0xFD ) 332129203Scognet && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) 333129203Scognet ) { 334129203Scognet float_raise( float_flag_overflow | float_flag_inexact ); 335129203Scognet return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); 336129203Scognet } 337129203Scognet if ( zExp < 0 ) { 338129203Scognet isTiny = 339129203Scognet ( float_detect_tininess == float_tininess_before_rounding ) 340129203Scognet || ( zExp < -1 ) 341129203Scognet || ( zSig + roundIncrement < 0x80000000 ); 342129203Scognet shift32RightJamming( zSig, - zExp, &zSig ); 343129203Scognet zExp = 0; 344129203Scognet roundBits = zSig & 0x7F; 345129203Scognet if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 346129203Scognet } 347129203Scognet } 348129203Scognet if ( roundBits ) float_exception_flags |= float_flag_inexact; 349129203Scognet zSig = ( zSig + roundIncrement )>>7; 350129203Scognet zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 351129203Scognet if ( zSig == 0 ) zExp = 0; 352129203Scognet return packFloat32( zSign, zExp, zSig ); 353129203Scognet 354129203Scognet} 355129203Scognet 356129203Scognet/* 357129203Scognet------------------------------------------------------------------------------- 358129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 359129203Scognetand significand `zSig', and returns the proper single-precision floating- 360129203Scognetpoint value corresponding to the abstract input. This routine is just like 361129203Scognet`roundAndPackFloat32' except that `zSig' does not have to be normalized. 362129203ScognetBit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 363129203Scognetfloating-point exponent. 364129203Scognet------------------------------------------------------------------------------- 365129203Scognet*/ 366129203Scognetstatic float32 367129203Scognet normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 368129203Scognet{ 369129203Scognet int8 shiftCount; 370129203Scognet 371129203Scognet shiftCount = countLeadingZeros32( zSig ) - 1; 372129203Scognet return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); 373129203Scognet 374129203Scognet} 375129203Scognet 376129203Scognet/* 377129203Scognet------------------------------------------------------------------------------- 378129203ScognetReturns the fraction bits of the double-precision floating-point value `a'. 379129203Scognet------------------------------------------------------------------------------- 380129203Scognet*/ 381129203ScognetINLINE bits64 extractFloat64Frac( float64 a ) 382129203Scognet{ 383129203Scognet 384129203Scognet return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF ); 385129203Scognet 386129203Scognet} 387129203Scognet 388129203Scognet/* 389129203Scognet------------------------------------------------------------------------------- 390129203ScognetReturns the exponent bits of the double-precision floating-point value `a'. 391129203Scognet------------------------------------------------------------------------------- 392129203Scognet*/ 393129203ScognetINLINE int16 extractFloat64Exp( float64 a ) 394129203Scognet{ 395129203Scognet 396129203Scognet return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF; 397129203Scognet 398129203Scognet} 399129203Scognet 400129203Scognet/* 401129203Scognet------------------------------------------------------------------------------- 402129203ScognetReturns the sign bit of the double-precision floating-point value `a'. 403129203Scognet------------------------------------------------------------------------------- 404129203Scognet*/ 405129203ScognetINLINE flag extractFloat64Sign( float64 a ) 406129203Scognet{ 407129203Scognet 408129203Scognet return FLOAT64_DEMANGLE(a)>>63; 409129203Scognet 410129203Scognet} 411129203Scognet 412129203Scognet/* 413129203Scognet------------------------------------------------------------------------------- 414129203ScognetNormalizes the subnormal double-precision floating-point value represented 415129203Scognetby the denormalized significand `aSig'. The normalized exponent and 416129203Scognetsignificand are stored at the locations pointed to by `zExpPtr' and 417129203Scognet`zSigPtr', respectively. 418129203Scognet------------------------------------------------------------------------------- 419129203Scognet*/ 420129203Scognetstatic void 421129203Scognet normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) 422129203Scognet{ 423129203Scognet int8 shiftCount; 424129203Scognet 425129203Scognet shiftCount = countLeadingZeros64( aSig ) - 11; 426129203Scognet *zSigPtr = aSig<<shiftCount; 427129203Scognet *zExpPtr = 1 - shiftCount; 428129203Scognet 429129203Scognet} 430129203Scognet 431129203Scognet/* 432129203Scognet------------------------------------------------------------------------------- 433129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into a 434129203Scognetdouble-precision floating-point value, returning the result. After being 435129203Scognetshifted into the proper positions, the three fields are simply added 436129203Scognettogether to form the result. This means that any integer portion of `zSig' 437129203Scognetwill be added into the exponent. Since a properly normalized significand 438129203Scognetwill have an integer portion equal to 1, the `zExp' input should be 1 less 439129203Scognetthan the desired result exponent whenever `zSig' is a complete, normalized 440129203Scognetsignificand. 441129203Scognet------------------------------------------------------------------------------- 442129203Scognet*/ 443129203ScognetINLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig ) 444129203Scognet{ 445129203Scognet 446129203Scognet return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) + 447129203Scognet ( ( (bits64) zExp )<<52 ) + zSig ); 448129203Scognet 449129203Scognet} 450129203Scognet 451129203Scognet/* 452129203Scognet------------------------------------------------------------------------------- 453129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 454129203Scognetand significand `zSig', and returns the proper double-precision floating- 455129203Scognetpoint value corresponding to the abstract input. Ordinarily, the abstract 456129203Scognetvalue is simply rounded and packed into the double-precision format, with 457129203Scognetthe inexact exception raised if the abstract input cannot be represented 458129203Scognetexactly. However, if the abstract value is too large, the overflow and 459129203Scognetinexact exceptions are raised and an infinity or maximal finite value is 460129203Scognetreturned. If the abstract value is too small, the input value is rounded to 461129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if 462129203Scognetthe abstract input cannot be represented exactly as a subnormal double- 463129203Scognetprecision floating-point number. 464129203Scognet The input significand `zSig' has its binary point between bits 62 465129203Scognetand 61, which is 10 bits to the left of the usual location. This shifted 466129203Scognetsignificand must be normalized or smaller. If `zSig' is not normalized, 467129203Scognet`zExp' must be 0; in that case, the result returned is a subnormal number, 468129203Scognetand it must not require rounding. In the usual case that `zSig' is 469129203Scognetnormalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 470129203ScognetThe handling of underflow and overflow follows the IEC/IEEE Standard for 471129203ScognetBinary Floating-Point Arithmetic. 472129203Scognet------------------------------------------------------------------------------- 473129203Scognet*/ 474129203Scognetstatic float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) 475129203Scognet{ 476129203Scognet int8 roundingMode; 477129203Scognet flag roundNearestEven; 478129203Scognet int16 roundIncrement, roundBits; 479129203Scognet flag isTiny; 480129203Scognet 481129203Scognet roundingMode = float_rounding_mode; 482129203Scognet roundNearestEven = ( roundingMode == float_round_nearest_even ); 483129203Scognet roundIncrement = 0x200; 484129203Scognet if ( ! roundNearestEven ) { 485129203Scognet if ( roundingMode == float_round_to_zero ) { 486129203Scognet roundIncrement = 0; 487129203Scognet } 488129203Scognet else { 489129203Scognet roundIncrement = 0x3FF; 490129203Scognet if ( zSign ) { 491129203Scognet if ( roundingMode == float_round_up ) roundIncrement = 0; 492129203Scognet } 493129203Scognet else { 494129203Scognet if ( roundingMode == float_round_down ) roundIncrement = 0; 495129203Scognet } 496129203Scognet } 497129203Scognet } 498129203Scognet roundBits = zSig & 0x3FF; 499129203Scognet if ( 0x7FD <= (bits16) zExp ) { 500129203Scognet if ( ( 0x7FD < zExp ) 501129203Scognet || ( ( zExp == 0x7FD ) 502129203Scognet && ( (sbits64) ( zSig + roundIncrement ) < 0 ) ) 503129203Scognet ) { 504129203Scognet float_raise( float_flag_overflow | float_flag_inexact ); 505129203Scognet return FLOAT64_MANGLE( 506129203Scognet FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) - 507129203Scognet ( roundIncrement == 0 )); 508129203Scognet } 509129203Scognet if ( zExp < 0 ) { 510129203Scognet isTiny = 511129203Scognet ( float_detect_tininess == float_tininess_before_rounding ) 512129203Scognet || ( zExp < -1 ) 513129203Scognet || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); 514129203Scognet shift64RightJamming( zSig, - zExp, &zSig ); 515129203Scognet zExp = 0; 516129203Scognet roundBits = zSig & 0x3FF; 517129203Scognet if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 518129203Scognet } 519129203Scognet } 520129203Scognet if ( roundBits ) float_exception_flags |= float_flag_inexact; 521129203Scognet zSig = ( zSig + roundIncrement )>>10; 522129203Scognet zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); 523129203Scognet if ( zSig == 0 ) zExp = 0; 524129203Scognet return packFloat64( zSign, zExp, zSig ); 525129203Scognet 526129203Scognet} 527129203Scognet 528129203Scognet/* 529129203Scognet------------------------------------------------------------------------------- 530129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 531129203Scognetand significand `zSig', and returns the proper double-precision floating- 532129203Scognetpoint value corresponding to the abstract input. This routine is just like 533129203Scognet`roundAndPackFloat64' except that `zSig' does not have to be normalized. 534129203ScognetBit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 535129203Scognetfloating-point exponent. 536129203Scognet------------------------------------------------------------------------------- 537129203Scognet*/ 538129203Scognetstatic float64 539129203Scognet normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) 540129203Scognet{ 541129203Scognet int8 shiftCount; 542129203Scognet 543129203Scognet shiftCount = countLeadingZeros64( zSig ) - 1; 544129203Scognet return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount ); 545129203Scognet 546129203Scognet} 547129203Scognet 548129203Scognet#ifdef FLOATX80 549129203Scognet 550129203Scognet/* 551129203Scognet------------------------------------------------------------------------------- 552129203ScognetReturns the fraction bits of the extended double-precision floating-point 553129203Scognetvalue `a'. 554129203Scognet------------------------------------------------------------------------------- 555129203Scognet*/ 556129203ScognetINLINE bits64 extractFloatx80Frac( floatx80 a ) 557129203Scognet{ 558129203Scognet 559129203Scognet return a.low; 560129203Scognet 561129203Scognet} 562129203Scognet 563129203Scognet/* 564129203Scognet------------------------------------------------------------------------------- 565129203ScognetReturns the exponent bits of the extended double-precision floating-point 566129203Scognetvalue `a'. 567129203Scognet------------------------------------------------------------------------------- 568129203Scognet*/ 569129203ScognetINLINE int32 extractFloatx80Exp( floatx80 a ) 570129203Scognet{ 571129203Scognet 572129203Scognet return a.high & 0x7FFF; 573129203Scognet 574129203Scognet} 575129203Scognet 576129203Scognet/* 577129203Scognet------------------------------------------------------------------------------- 578129203ScognetReturns the sign bit of the extended double-precision floating-point value 579129203Scognet`a'. 580129203Scognet------------------------------------------------------------------------------- 581129203Scognet*/ 582129203ScognetINLINE flag extractFloatx80Sign( floatx80 a ) 583129203Scognet{ 584129203Scognet 585129203Scognet return a.high>>15; 586129203Scognet 587129203Scognet} 588129203Scognet 589129203Scognet/* 590129203Scognet------------------------------------------------------------------------------- 591129203ScognetNormalizes the subnormal extended double-precision floating-point value 592129203Scognetrepresented by the denormalized significand `aSig'. The normalized exponent 593129203Scognetand significand are stored at the locations pointed to by `zExpPtr' and 594129203Scognet`zSigPtr', respectively. 595129203Scognet------------------------------------------------------------------------------- 596129203Scognet*/ 597129203Scognetstatic void 598129203Scognet normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) 599129203Scognet{ 600129203Scognet int8 shiftCount; 601129203Scognet 602129203Scognet shiftCount = countLeadingZeros64( aSig ); 603129203Scognet *zSigPtr = aSig<<shiftCount; 604129203Scognet *zExpPtr = 1 - shiftCount; 605129203Scognet 606129203Scognet} 607129203Scognet 608129203Scognet/* 609129203Scognet------------------------------------------------------------------------------- 610129203ScognetPacks the sign `zSign', exponent `zExp', and significand `zSig' into an 611129203Scognetextended double-precision floating-point value, returning the result. 612129203Scognet------------------------------------------------------------------------------- 613129203Scognet*/ 614129203ScognetINLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) 615129203Scognet{ 616129203Scognet floatx80 z; 617129203Scognet 618129203Scognet z.low = zSig; 619129203Scognet z.high = ( ( (bits16) zSign )<<15 ) + zExp; 620129203Scognet return z; 621129203Scognet 622129203Scognet} 623129203Scognet 624129203Scognet/* 625129203Scognet------------------------------------------------------------------------------- 626129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 627129203Scognetand extended significand formed by the concatenation of `zSig0' and `zSig1', 628129203Scognetand returns the proper extended double-precision floating-point value 629129203Scognetcorresponding to the abstract input. Ordinarily, the abstract value is 630129203Scognetrounded and packed into the extended double-precision format, with the 631129203Scognetinexact exception raised if the abstract input cannot be represented 632129203Scognetexactly. However, if the abstract value is too large, the overflow and 633129203Scognetinexact exceptions are raised and an infinity or maximal finite value is 634129203Scognetreturned. If the abstract value is too small, the input value is rounded to 635129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if 636129203Scognetthe abstract input cannot be represented exactly as a subnormal extended 637129203Scognetdouble-precision floating-point number. 638129203Scognet If `roundingPrecision' is 32 or 64, the result is rounded to the same 639129203Scognetnumber of bits as single or double precision, respectively. Otherwise, the 640129203Scognetresult is rounded to the full precision of the extended double-precision 641129203Scognetformat. 642129203Scognet The input significand must be normalized or smaller. If the input 643129203Scognetsignificand is not normalized, `zExp' must be 0; in that case, the result 644129203Scognetreturned is a subnormal number, and it must not require rounding. The 645129203Scognethandling of underflow and overflow follows the IEC/IEEE Standard for Binary 646129203ScognetFloating-Point Arithmetic. 647129203Scognet------------------------------------------------------------------------------- 648129203Scognet*/ 649129203Scognetstatic floatx80 650129203Scognet roundAndPackFloatx80( 651129203Scognet int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 652129203Scognet ) 653129203Scognet{ 654129203Scognet int8 roundingMode; 655129203Scognet flag roundNearestEven, increment, isTiny; 656129203Scognet int64 roundIncrement, roundMask, roundBits; 657129203Scognet 658129203Scognet roundingMode = float_rounding_mode; 659129203Scognet roundNearestEven = ( roundingMode == float_round_nearest_even ); 660129203Scognet if ( roundingPrecision == 80 ) goto precision80; 661129203Scognet if ( roundingPrecision == 64 ) { 662129203Scognet roundIncrement = LIT64( 0x0000000000000400 ); 663129203Scognet roundMask = LIT64( 0x00000000000007FF ); 664129203Scognet } 665129203Scognet else if ( roundingPrecision == 32 ) { 666129203Scognet roundIncrement = LIT64( 0x0000008000000000 ); 667129203Scognet roundMask = LIT64( 0x000000FFFFFFFFFF ); 668129203Scognet } 669129203Scognet else { 670129203Scognet goto precision80; 671129203Scognet } 672129203Scognet zSig0 |= ( zSig1 != 0 ); 673129203Scognet if ( ! roundNearestEven ) { 674129203Scognet if ( roundingMode == float_round_to_zero ) { 675129203Scognet roundIncrement = 0; 676129203Scognet } 677129203Scognet else { 678129203Scognet roundIncrement = roundMask; 679129203Scognet if ( zSign ) { 680129203Scognet if ( roundingMode == float_round_up ) roundIncrement = 0; 681129203Scognet } 682129203Scognet else { 683129203Scognet if ( roundingMode == float_round_down ) roundIncrement = 0; 684129203Scognet } 685129203Scognet } 686129203Scognet } 687129203Scognet roundBits = zSig0 & roundMask; 688129203Scognet if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { 689129203Scognet if ( ( 0x7FFE < zExp ) 690129203Scognet || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) 691129203Scognet ) { 692129203Scognet goto overflow; 693129203Scognet } 694129203Scognet if ( zExp <= 0 ) { 695129203Scognet isTiny = 696129203Scognet ( float_detect_tininess == float_tininess_before_rounding ) 697129203Scognet || ( zExp < 0 ) 698129203Scognet || ( zSig0 <= zSig0 + roundIncrement ); 699129203Scognet shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); 700129203Scognet zExp = 0; 701129203Scognet roundBits = zSig0 & roundMask; 702129203Scognet if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 703129203Scognet if ( roundBits ) float_exception_flags |= float_flag_inexact; 704129203Scognet zSig0 += roundIncrement; 705129203Scognet if ( (sbits64) zSig0 < 0 ) zExp = 1; 706129203Scognet roundIncrement = roundMask + 1; 707129203Scognet if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 708129203Scognet roundMask |= roundIncrement; 709129203Scognet } 710129203Scognet zSig0 &= ~ roundMask; 711129203Scognet return packFloatx80( zSign, zExp, zSig0 ); 712129203Scognet } 713129203Scognet } 714129203Scognet if ( roundBits ) float_exception_flags |= float_flag_inexact; 715129203Scognet zSig0 += roundIncrement; 716129203Scognet if ( zSig0 < roundIncrement ) { 717129203Scognet ++zExp; 718129203Scognet zSig0 = LIT64( 0x8000000000000000 ); 719129203Scognet } 720129203Scognet roundIncrement = roundMask + 1; 721129203Scognet if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 722129203Scognet roundMask |= roundIncrement; 723129203Scognet } 724129203Scognet zSig0 &= ~ roundMask; 725129203Scognet if ( zSig0 == 0 ) zExp = 0; 726129203Scognet return packFloatx80( zSign, zExp, zSig0 ); 727129203Scognet precision80: 728129203Scognet increment = ( (sbits64) zSig1 < 0 ); 729129203Scognet if ( ! roundNearestEven ) { 730129203Scognet if ( roundingMode == float_round_to_zero ) { 731129203Scognet increment = 0; 732129203Scognet } 733129203Scognet else { 734129203Scognet if ( zSign ) { 735129203Scognet increment = ( roundingMode == float_round_down ) && zSig1; 736129203Scognet } 737129203Scognet else { 738129203Scognet increment = ( roundingMode == float_round_up ) && zSig1; 739129203Scognet } 740129203Scognet } 741129203Scognet } 742129203Scognet if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { 743129203Scognet if ( ( 0x7FFE < zExp ) 744129203Scognet || ( ( zExp == 0x7FFE ) 745129203Scognet && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) 746129203Scognet && increment 747129203Scognet ) 748129203Scognet ) { 749129203Scognet roundMask = 0; 750129203Scognet overflow: 751129203Scognet float_raise( float_flag_overflow | float_flag_inexact ); 752129203Scognet if ( ( roundingMode == float_round_to_zero ) 753129203Scognet || ( zSign && ( roundingMode == float_round_up ) ) 754129203Scognet || ( ! zSign && ( roundingMode == float_round_down ) ) 755129203Scognet ) { 756129203Scognet return packFloatx80( zSign, 0x7FFE, ~ roundMask ); 757129203Scognet } 758129203Scognet return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 759129203Scognet } 760129203Scognet if ( zExp <= 0 ) { 761129203Scognet isTiny = 762129203Scognet ( float_detect_tininess == float_tininess_before_rounding ) 763129203Scognet || ( zExp < 0 ) 764129203Scognet || ! increment 765129203Scognet || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); 766129203Scognet shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); 767129203Scognet zExp = 0; 768129203Scognet if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); 769129203Scognet if ( zSig1 ) float_exception_flags |= float_flag_inexact; 770129203Scognet if ( roundNearestEven ) { 771129203Scognet increment = ( (sbits64) zSig1 < 0 ); 772129203Scognet } 773129203Scognet else { 774129203Scognet if ( zSign ) { 775129203Scognet increment = ( roundingMode == float_round_down ) && zSig1; 776129203Scognet } 777129203Scognet else { 778129203Scognet increment = ( roundingMode == float_round_up ) && zSig1; 779129203Scognet } 780129203Scognet } 781129203Scognet if ( increment ) { 782129203Scognet ++zSig0; 783129203Scognet zSig0 &= 784129203Scognet ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); 785129203Scognet if ( (sbits64) zSig0 < 0 ) zExp = 1; 786129203Scognet } 787129203Scognet return packFloatx80( zSign, zExp, zSig0 ); 788129203Scognet } 789129203Scognet } 790129203Scognet if ( zSig1 ) float_exception_flags |= float_flag_inexact; 791129203Scognet if ( increment ) { 792129203Scognet ++zSig0; 793129203Scognet if ( zSig0 == 0 ) { 794129203Scognet ++zExp; 795129203Scognet zSig0 = LIT64( 0x8000000000000000 ); 796129203Scognet } 797129203Scognet else { 798129203Scognet zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); 799129203Scognet } 800129203Scognet } 801129203Scognet else { 802129203Scognet if ( zSig0 == 0 ) zExp = 0; 803129203Scognet } 804129203Scognet return packFloatx80( zSign, zExp, zSig0 ); 805129203Scognet 806129203Scognet} 807129203Scognet 808129203Scognet/* 809129203Scognet------------------------------------------------------------------------------- 810129203ScognetTakes an abstract floating-point value having sign `zSign', exponent 811129203Scognet`zExp', and significand formed by the concatenation of `zSig0' and `zSig1', 812129203Scognetand returns the proper extended double-precision floating-point value 813129203Scognetcorresponding to the abstract input. This routine is just like 814129203Scognet`roundAndPackFloatx80' except that the input significand does not have to be 815129203Scognetnormalized. 816129203Scognet------------------------------------------------------------------------------- 817129203Scognet*/ 818129203Scognetstatic floatx80 819129203Scognet normalizeRoundAndPackFloatx80( 820129203Scognet int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 821129203Scognet ) 822129203Scognet{ 823129203Scognet int8 shiftCount; 824129203Scognet 825129203Scognet if ( zSig0 == 0 ) { 826129203Scognet zSig0 = zSig1; 827129203Scognet zSig1 = 0; 828129203Scognet zExp -= 64; 829129203Scognet } 830129203Scognet shiftCount = countLeadingZeros64( zSig0 ); 831129203Scognet shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 832129203Scognet zExp -= shiftCount; 833129203Scognet return 834129203Scognet roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); 835129203Scognet 836129203Scognet} 837129203Scognet 838129203Scognet#endif 839129203Scognet 840129203Scognet#ifdef FLOAT128 841129203Scognet 842129203Scognet/* 843129203Scognet------------------------------------------------------------------------------- 844129203ScognetReturns the least-significant 64 fraction bits of the quadruple-precision 845129203Scognetfloating-point value `a'. 846129203Scognet------------------------------------------------------------------------------- 847129203Scognet*/ 848129203ScognetINLINE bits64 extractFloat128Frac1( float128 a ) 849129203Scognet{ 850129203Scognet 851129203Scognet return a.low; 852129203Scognet 853129203Scognet} 854129203Scognet 855129203Scognet/* 856129203Scognet------------------------------------------------------------------------------- 857129203ScognetReturns the most-significant 48 fraction bits of the quadruple-precision 858129203Scognetfloating-point value `a'. 859129203Scognet------------------------------------------------------------------------------- 860129203Scognet*/ 861129203ScognetINLINE bits64 extractFloat128Frac0( float128 a ) 862129203Scognet{ 863129203Scognet 864129203Scognet return a.high & LIT64( 0x0000FFFFFFFFFFFF ); 865129203Scognet 866129203Scognet} 867129203Scognet 868129203Scognet/* 869129203Scognet------------------------------------------------------------------------------- 870129203ScognetReturns the exponent bits of the quadruple-precision floating-point value 871129203Scognet`a'. 872129203Scognet------------------------------------------------------------------------------- 873129203Scognet*/ 874129203ScognetINLINE int32 extractFloat128Exp( float128 a ) 875129203Scognet{ 876129203Scognet 877129203Scognet return ( a.high>>48 ) & 0x7FFF; 878129203Scognet 879129203Scognet} 880129203Scognet 881129203Scognet/* 882129203Scognet------------------------------------------------------------------------------- 883129203ScognetReturns the sign bit of the quadruple-precision floating-point value `a'. 884129203Scognet------------------------------------------------------------------------------- 885129203Scognet*/ 886129203ScognetINLINE flag extractFloat128Sign( float128 a ) 887129203Scognet{ 888129203Scognet 889129203Scognet return a.high>>63; 890129203Scognet 891129203Scognet} 892129203Scognet 893129203Scognet/* 894129203Scognet------------------------------------------------------------------------------- 895129203ScognetNormalizes the subnormal quadruple-precision floating-point value 896129203Scognetrepresented by the denormalized significand formed by the concatenation of 897129203Scognet`aSig0' and `aSig1'. The normalized exponent is stored at the location 898129203Scognetpointed to by `zExpPtr'. The most significant 49 bits of the normalized 899129203Scognetsignificand are stored at the location pointed to by `zSig0Ptr', and the 900129203Scognetleast significant 64 bits of the normalized significand are stored at the 901129203Scognetlocation pointed to by `zSig1Ptr'. 902129203Scognet------------------------------------------------------------------------------- 903129203Scognet*/ 904129203Scognetstatic void 905129203Scognet normalizeFloat128Subnormal( 906129203Scognet bits64 aSig0, 907129203Scognet bits64 aSig1, 908129203Scognet int32 *zExpPtr, 909129203Scognet bits64 *zSig0Ptr, 910129203Scognet bits64 *zSig1Ptr 911129203Scognet ) 912129203Scognet{ 913129203Scognet int8 shiftCount; 914129203Scognet 915129203Scognet if ( aSig0 == 0 ) { 916129203Scognet shiftCount = countLeadingZeros64( aSig1 ) - 15; 917129203Scognet if ( shiftCount < 0 ) { 918129203Scognet *zSig0Ptr = aSig1>>( - shiftCount ); 919129203Scognet *zSig1Ptr = aSig1<<( shiftCount & 63 ); 920129203Scognet } 921129203Scognet else { 922129203Scognet *zSig0Ptr = aSig1<<shiftCount; 923129203Scognet *zSig1Ptr = 0; 924129203Scognet } 925129203Scognet *zExpPtr = - shiftCount - 63; 926129203Scognet } 927129203Scognet else { 928129203Scognet shiftCount = countLeadingZeros64( aSig0 ) - 15; 929129203Scognet shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); 930129203Scognet *zExpPtr = 1 - shiftCount; 931129203Scognet } 932129203Scognet 933129203Scognet} 934129203Scognet 935129203Scognet/* 936129203Scognet------------------------------------------------------------------------------- 937129203ScognetPacks the sign `zSign', the exponent `zExp', and the significand formed 938129203Scognetby the concatenation of `zSig0' and `zSig1' into a quadruple-precision 939129203Scognetfloating-point value, returning the result. After being shifted into the 940129203Scognetproper positions, the three fields `zSign', `zExp', and `zSig0' are simply 941129203Scognetadded together to form the most significant 32 bits of the result. This 942129203Scognetmeans that any integer portion of `zSig0' will be added into the exponent. 943129203ScognetSince a properly normalized significand will have an integer portion equal 944129203Scognetto 1, the `zExp' input should be 1 less than the desired result exponent 945129203Scognetwhenever `zSig0' and `zSig1' concatenated form a complete, normalized 946129203Scognetsignificand. 947129203Scognet------------------------------------------------------------------------------- 948129203Scognet*/ 949129203ScognetINLINE float128 950129203Scognet packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) 951129203Scognet{ 952129203Scognet float128 z; 953129203Scognet 954129203Scognet z.low = zSig1; 955129203Scognet z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0; 956129203Scognet return z; 957129203Scognet 958129203Scognet} 959129203Scognet 960129203Scognet/* 961129203Scognet------------------------------------------------------------------------------- 962129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 963129203Scognetand extended significand formed by the concatenation of `zSig0', `zSig1', 964129203Scognetand `zSig2', and returns the proper quadruple-precision floating-point value 965129203Scognetcorresponding to the abstract input. Ordinarily, the abstract value is 966129203Scognetsimply rounded and packed into the quadruple-precision format, with the 967129203Scognetinexact exception raised if the abstract input cannot be represented 968129203Scognetexactly. However, if the abstract value is too large, the overflow and 969129203Scognetinexact exceptions are raised and an infinity or maximal finite value is 970129203Scognetreturned. If the abstract value is too small, the input value is rounded to 971129203Scogneta subnormal number, and the underflow and inexact exceptions are raised if 972129203Scognetthe abstract input cannot be represented exactly as a subnormal quadruple- 973129203Scognetprecision floating-point number. 974129203Scognet The input significand must be normalized or smaller. If the input 975129203Scognetsignificand is not normalized, `zExp' must be 0; in that case, the result 976129203Scognetreturned is a subnormal number, and it must not require rounding. In the 977129203Scognetusual case that the input significand is normalized, `zExp' must be 1 less 978129203Scognetthan the ``true'' floating-point exponent. The handling of underflow and 979129203Scognetoverflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 980129203Scognet------------------------------------------------------------------------------- 981129203Scognet*/ 982129203Scognetstatic float128 983129203Scognet roundAndPackFloat128( 984129203Scognet flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 ) 985129203Scognet{ 986129203Scognet int8 roundingMode; 987129203Scognet flag roundNearestEven, increment, isTiny; 988129203Scognet 989129203Scognet roundingMode = float_rounding_mode; 990129203Scognet roundNearestEven = ( roundingMode == float_round_nearest_even ); 991129203Scognet increment = ( (sbits64) zSig2 < 0 ); 992129203Scognet if ( ! roundNearestEven ) { 993129203Scognet if ( roundingMode == float_round_to_zero ) { 994129203Scognet increment = 0; 995129203Scognet } 996129203Scognet else { 997129203Scognet if ( zSign ) { 998129203Scognet increment = ( roundingMode == float_round_down ) && zSig2; 999129203Scognet } 1000129203Scognet else { 1001129203Scognet increment = ( roundingMode == float_round_up ) && zSig2; 1002129203Scognet } 1003129203Scognet } 1004129203Scognet } 1005129203Scognet if ( 0x7FFD <= (bits32) zExp ) { 1006129203Scognet if ( ( 0x7FFD < zExp ) 1007129203Scognet || ( ( zExp == 0x7FFD ) 1008129203Scognet && eq128( 1009129203Scognet LIT64( 0x0001FFFFFFFFFFFF ), 1010129203Scognet LIT64( 0xFFFFFFFFFFFFFFFF ), 1011129203Scognet zSig0, 1012129203Scognet zSig1 1013129203Scognet ) 1014129203Scognet && increment 1015129203Scognet ) 1016129203Scognet ) { 1017129203Scognet float_raise( float_flag_overflow | float_flag_inexact ); 1018129203Scognet if ( ( roundingMode == float_round_to_zero ) 1019129203Scognet || ( zSign && ( roundingMode == float_round_up ) ) 1020129203Scognet || ( ! zSign && ( roundingMode == float_round_down ) ) 1021129203Scognet ) { 1022129203Scognet return 1023129203Scognet packFloat128( 1024129203Scognet zSign, 1025129203Scognet 0x7FFE, 1026129203Scognet LIT64( 0x0000FFFFFFFFFFFF ), 1027129203Scognet LIT64( 0xFFFFFFFFFFFFFFFF ) 1028129203Scognet ); 1029129203Scognet } 1030129203Scognet return packFloat128( zSign, 0x7FFF, 0, 0 ); 1031129203Scognet } 1032129203Scognet if ( zExp < 0 ) { 1033129203Scognet isTiny = 1034129203Scognet ( float_detect_tininess == float_tininess_before_rounding ) 1035129203Scognet || ( zExp < -1 ) 1036129203Scognet || ! increment 1037129203Scognet || lt128( 1038129203Scognet zSig0, 1039129203Scognet zSig1, 1040129203Scognet LIT64( 0x0001FFFFFFFFFFFF ), 1041129203Scognet LIT64( 0xFFFFFFFFFFFFFFFF ) 1042129203Scognet ); 1043129203Scognet shift128ExtraRightJamming( 1044129203Scognet zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); 1045129203Scognet zExp = 0; 1046129203Scognet if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); 1047129203Scognet if ( roundNearestEven ) { 1048129203Scognet increment = ( (sbits64) zSig2 < 0 ); 1049129203Scognet } 1050129203Scognet else { 1051129203Scognet if ( zSign ) { 1052129203Scognet increment = ( roundingMode == float_round_down ) && zSig2; 1053129203Scognet } 1054129203Scognet else { 1055129203Scognet increment = ( roundingMode == float_round_up ) && zSig2; 1056129203Scognet } 1057129203Scognet } 1058129203Scognet } 1059129203Scognet } 1060129203Scognet if ( zSig2 ) float_exception_flags |= float_flag_inexact; 1061129203Scognet if ( increment ) { 1062129203Scognet add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); 1063129203Scognet zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); 1064129203Scognet } 1065129203Scognet else { 1066129203Scognet if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; 1067129203Scognet } 1068129203Scognet return packFloat128( zSign, zExp, zSig0, zSig1 ); 1069129203Scognet 1070129203Scognet} 1071129203Scognet 1072129203Scognet/* 1073129203Scognet------------------------------------------------------------------------------- 1074129203ScognetTakes an abstract floating-point value having sign `zSign', exponent `zExp', 1075129203Scognetand significand formed by the concatenation of `zSig0' and `zSig1', and 1076129203Scognetreturns the proper quadruple-precision floating-point value corresponding 1077129203Scognetto the abstract input. This routine is just like `roundAndPackFloat128' 1078129203Scognetexcept that the input significand has fewer bits and does not have to be 1079129203Scognetnormalized. In all cases, `zExp' must be 1 less than the ``true'' floating- 1080129203Scognetpoint exponent. 1081129203Scognet------------------------------------------------------------------------------- 1082129203Scognet*/ 1083129203Scognetstatic float128 1084129203Scognet normalizeRoundAndPackFloat128( 1085129203Scognet flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) 1086129203Scognet{ 1087129203Scognet int8 shiftCount; 1088129203Scognet bits64 zSig2; 1089129203Scognet 1090129203Scognet if ( zSig0 == 0 ) { 1091129203Scognet zSig0 = zSig1; 1092129203Scognet zSig1 = 0; 1093129203Scognet zExp -= 64; 1094129203Scognet } 1095129203Scognet shiftCount = countLeadingZeros64( zSig0 ) - 15; 1096129203Scognet if ( 0 <= shiftCount ) { 1097129203Scognet zSig2 = 0; 1098129203Scognet shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 1099129203Scognet } 1100129203Scognet else { 1101129203Scognet shift128ExtraRightJamming( 1102129203Scognet zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); 1103129203Scognet } 1104129203Scognet zExp -= shiftCount; 1105129203Scognet return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 1106129203Scognet 1107129203Scognet} 1108129203Scognet 1109129203Scognet#endif 1110129203Scognet 1111129203Scognet/* 1112129203Scognet------------------------------------------------------------------------------- 1113129203ScognetReturns the result of converting the 32-bit two's complement integer `a' 1114129203Scognetto the single-precision floating-point format. The conversion is performed 1115129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1116129203Scognet------------------------------------------------------------------------------- 1117129203Scognet*/ 1118129203Scognetfloat32 int32_to_float32( int32 a ) 1119129203Scognet{ 1120129203Scognet flag zSign; 1121129203Scognet 1122129203Scognet if ( a == 0 ) return 0; 1123129203Scognet if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); 1124129203Scognet zSign = ( a < 0 ); 1125129203Scognet return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); 1126129203Scognet 1127129203Scognet} 1128129203Scognet 1129129203Scognet/* 1130129203Scognet------------------------------------------------------------------------------- 1131129203ScognetReturns the result of converting the 32-bit two's complement integer `a' 1132129203Scognetto the double-precision floating-point format. The conversion is performed 1133129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1134129203Scognet------------------------------------------------------------------------------- 1135129203Scognet*/ 1136129203Scognetfloat64 int32_to_float64( int32 a ) 1137129203Scognet{ 1138129203Scognet flag zSign; 1139129203Scognet uint32 absA; 1140129203Scognet int8 shiftCount; 1141129203Scognet bits64 zSig; 1142129203Scognet 1143129203Scognet if ( a == 0 ) return 0; 1144129203Scognet zSign = ( a < 0 ); 1145129203Scognet absA = zSign ? - a : a; 1146129203Scognet shiftCount = countLeadingZeros32( absA ) + 21; 1147129203Scognet zSig = absA; 1148129203Scognet return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount ); 1149129203Scognet 1150129203Scognet} 1151129203Scognet 1152129203Scognet#ifdef FLOATX80 1153129203Scognet 1154129203Scognet/* 1155129203Scognet------------------------------------------------------------------------------- 1156129203ScognetReturns the result of converting the 32-bit two's complement integer `a' 1157129203Scognetto the extended double-precision floating-point format. The conversion 1158129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 1159129203ScognetArithmetic. 1160129203Scognet------------------------------------------------------------------------------- 1161129203Scognet*/ 1162129203Scognetfloatx80 int32_to_floatx80( int32 a ) 1163129203Scognet{ 1164129203Scognet flag zSign; 1165129203Scognet uint32 absA; 1166129203Scognet int8 shiftCount; 1167129203Scognet bits64 zSig; 1168129203Scognet 1169129203Scognet if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 1170129203Scognet zSign = ( a < 0 ); 1171129203Scognet absA = zSign ? - a : a; 1172129203Scognet shiftCount = countLeadingZeros32( absA ) + 32; 1173129203Scognet zSig = absA; 1174129203Scognet return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); 1175129203Scognet 1176129203Scognet} 1177129203Scognet 1178129203Scognet#endif 1179129203Scognet 1180129203Scognet#ifdef FLOAT128 1181129203Scognet 1182129203Scognet/* 1183129203Scognet------------------------------------------------------------------------------- 1184129203ScognetReturns the result of converting the 32-bit two's complement integer `a' to 1185129203Scognetthe quadruple-precision floating-point format. The conversion is performed 1186129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1187129203Scognet------------------------------------------------------------------------------- 1188129203Scognet*/ 1189129203Scognetfloat128 int32_to_float128( int32 a ) 1190129203Scognet{ 1191129203Scognet flag zSign; 1192129203Scognet uint32 absA; 1193129203Scognet int8 shiftCount; 1194129203Scognet bits64 zSig0; 1195129203Scognet 1196129203Scognet if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); 1197129203Scognet zSign = ( a < 0 ); 1198129203Scognet absA = zSign ? - a : a; 1199129203Scognet shiftCount = countLeadingZeros32( absA ) + 17; 1200129203Scognet zSig0 = absA; 1201129203Scognet return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 ); 1202129203Scognet 1203129203Scognet} 1204129203Scognet 1205129203Scognet#endif 1206129203Scognet 1207129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */ 1208129203Scognet/* 1209129203Scognet------------------------------------------------------------------------------- 1210129203ScognetReturns the result of converting the 64-bit two's complement integer `a' 1211129203Scognetto the single-precision floating-point format. The conversion is performed 1212129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1213129203Scognet------------------------------------------------------------------------------- 1214129203Scognet*/ 1215129203Scognetfloat32 int64_to_float32( int64 a ) 1216129203Scognet{ 1217129203Scognet flag zSign; 1218129203Scognet uint64 absA; 1219129203Scognet int8 shiftCount; 1220129203Scognet 1221129203Scognet if ( a == 0 ) return 0; 1222129203Scognet zSign = ( a < 0 ); 1223129203Scognet absA = zSign ? - a : a; 1224129203Scognet shiftCount = countLeadingZeros64( absA ) - 40; 1225129203Scognet if ( 0 <= shiftCount ) { 1226129203Scognet return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount ); 1227129203Scognet } 1228129203Scognet else { 1229129203Scognet shiftCount += 7; 1230129203Scognet if ( shiftCount < 0 ) { 1231129203Scognet shift64RightJamming( absA, - shiftCount, &absA ); 1232129203Scognet } 1233129203Scognet else { 1234129203Scognet absA <<= shiftCount; 1235129203Scognet } 1236129203Scognet return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA ); 1237129203Scognet } 1238129203Scognet 1239129203Scognet} 1240129203Scognet 1241129203Scognet/* 1242129203Scognet------------------------------------------------------------------------------- 1243129203ScognetReturns the result of converting the 64-bit two's complement integer `a' 1244129203Scognetto the double-precision floating-point format. The conversion is performed 1245129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1246129203Scognet------------------------------------------------------------------------------- 1247129203Scognet*/ 1248129203Scognetfloat64 int64_to_float64( int64 a ) 1249129203Scognet{ 1250129203Scognet flag zSign; 1251129203Scognet 1252129203Scognet if ( a == 0 ) return 0; 1253129203Scognet if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) { 1254129203Scognet return packFloat64( 1, 0x43E, 0 ); 1255129203Scognet } 1256129203Scognet zSign = ( a < 0 ); 1257129203Scognet return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a ); 1258129203Scognet 1259129203Scognet} 1260129203Scognet 1261129203Scognet#ifdef FLOATX80 1262129203Scognet 1263129203Scognet/* 1264129203Scognet------------------------------------------------------------------------------- 1265129203ScognetReturns the result of converting the 64-bit two's complement integer `a' 1266129203Scognetto the extended double-precision floating-point format. The conversion 1267129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 1268129203ScognetArithmetic. 1269129203Scognet------------------------------------------------------------------------------- 1270129203Scognet*/ 1271129203Scognetfloatx80 int64_to_floatx80( int64 a ) 1272129203Scognet{ 1273129203Scognet flag zSign; 1274129203Scognet uint64 absA; 1275129203Scognet int8 shiftCount; 1276129203Scognet 1277129203Scognet if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 1278129203Scognet zSign = ( a < 0 ); 1279129203Scognet absA = zSign ? - a : a; 1280129203Scognet shiftCount = countLeadingZeros64( absA ); 1281129203Scognet return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount ); 1282129203Scognet 1283129203Scognet} 1284129203Scognet 1285129203Scognet#endif 1286129203Scognet 1287129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 1288129203Scognet 1289129203Scognet#ifdef FLOAT128 1290129203Scognet 1291129203Scognet/* 1292129203Scognet------------------------------------------------------------------------------- 1293129203ScognetReturns the result of converting the 64-bit two's complement integer `a' to 1294129203Scognetthe quadruple-precision floating-point format. The conversion is performed 1295129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1296129203Scognet------------------------------------------------------------------------------- 1297129203Scognet*/ 1298129203Scognetfloat128 int64_to_float128( int64 a ) 1299129203Scognet{ 1300129203Scognet flag zSign; 1301129203Scognet uint64 absA; 1302129203Scognet int8 shiftCount; 1303129203Scognet int32 zExp; 1304129203Scognet bits64 zSig0, zSig1; 1305129203Scognet 1306129203Scognet if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); 1307129203Scognet zSign = ( a < 0 ); 1308129203Scognet absA = zSign ? - a : a; 1309129203Scognet shiftCount = countLeadingZeros64( absA ) + 49; 1310129203Scognet zExp = 0x406E - shiftCount; 1311129203Scognet if ( 64 <= shiftCount ) { 1312129203Scognet zSig1 = 0; 1313129203Scognet zSig0 = absA; 1314129203Scognet shiftCount -= 64; 1315129203Scognet } 1316129203Scognet else { 1317129203Scognet zSig1 = absA; 1318129203Scognet zSig0 = 0; 1319129203Scognet } 1320129203Scognet shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 1321129203Scognet return packFloat128( zSign, zExp, zSig0, zSig1 ); 1322129203Scognet 1323129203Scognet} 1324129203Scognet 1325129203Scognet#endif 1326129203Scognet 1327129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1328129203Scognet/* 1329129203Scognet------------------------------------------------------------------------------- 1330129203ScognetReturns the result of converting the single-precision floating-point value 1331129203Scognet`a' to the 32-bit two's complement integer format. The conversion is 1332129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 1333129203ScognetArithmetic---which means in particular that the conversion is rounded 1334129203Scognetaccording to the current rounding mode. If `a' is a NaN, the largest 1335129203Scognetpositive integer is returned. Otherwise, if the conversion overflows, the 1336129203Scognetlargest integer with the same sign as `a' is returned. 1337129203Scognet------------------------------------------------------------------------------- 1338129203Scognet*/ 1339129203Scognetint32 float32_to_int32( float32 a ) 1340129203Scognet{ 1341129203Scognet flag aSign; 1342129203Scognet int16 aExp, shiftCount; 1343129203Scognet bits32 aSig; 1344129203Scognet bits64 aSig64; 1345129203Scognet 1346129203Scognet aSig = extractFloat32Frac( a ); 1347129203Scognet aExp = extractFloat32Exp( a ); 1348129203Scognet aSign = extractFloat32Sign( a ); 1349129203Scognet if ( ( aExp == 0xFF ) && aSig ) aSign = 0; 1350129203Scognet if ( aExp ) aSig |= 0x00800000; 1351129203Scognet shiftCount = 0xAF - aExp; 1352129203Scognet aSig64 = aSig; 1353129203Scognet aSig64 <<= 32; 1354129203Scognet if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 ); 1355129203Scognet return roundAndPackInt32( aSign, aSig64 ); 1356129203Scognet 1357129203Scognet} 1358129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 1359129203Scognet 1360129203Scognet/* 1361129203Scognet------------------------------------------------------------------------------- 1362129203ScognetReturns the result of converting the single-precision floating-point value 1363129203Scognet`a' to the 32-bit two's complement integer format. The conversion is 1364129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 1365129203ScognetArithmetic, except that the conversion is always rounded toward zero. 1366129203ScognetIf `a' is a NaN, the largest positive integer is returned. Otherwise, if 1367129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is 1368129203Scognetreturned. 1369129203Scognet------------------------------------------------------------------------------- 1370129203Scognet*/ 1371129203Scognetint32 float32_to_int32_round_to_zero( float32 a ) 1372129203Scognet{ 1373129203Scognet flag aSign; 1374129203Scognet int16 aExp, shiftCount; 1375129203Scognet bits32 aSig; 1376129203Scognet int32 z; 1377129203Scognet 1378129203Scognet aSig = extractFloat32Frac( a ); 1379129203Scognet aExp = extractFloat32Exp( a ); 1380129203Scognet aSign = extractFloat32Sign( a ); 1381129203Scognet shiftCount = aExp - 0x9E; 1382129203Scognet if ( 0 <= shiftCount ) { 1383129203Scognet if ( a != 0xCF000000 ) { 1384129203Scognet float_raise( float_flag_invalid ); 1385129203Scognet if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; 1386129203Scognet } 1387129203Scognet return (sbits32) 0x80000000; 1388129203Scognet } 1389129203Scognet else if ( aExp <= 0x7E ) { 1390129203Scognet if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 1391129203Scognet return 0; 1392129203Scognet } 1393129203Scognet aSig = ( aSig | 0x00800000 )<<8; 1394129203Scognet z = aSig>>( - shiftCount ); 1395129203Scognet if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { 1396129203Scognet float_exception_flags |= float_flag_inexact; 1397129203Scognet } 1398129203Scognet if ( aSign ) z = - z; 1399129203Scognet return z; 1400129203Scognet 1401129203Scognet} 1402129203Scognet 1403129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */ 1404129203Scognet/* 1405129203Scognet------------------------------------------------------------------------------- 1406129203ScognetReturns the result of converting the single-precision floating-point value 1407129203Scognet`a' to the 64-bit two's complement integer format. The conversion is 1408129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 1409129203ScognetArithmetic---which means in particular that the conversion is rounded 1410129203Scognetaccording to the current rounding mode. If `a' is a NaN, the largest 1411129203Scognetpositive integer is returned. Otherwise, if the conversion overflows, the 1412129203Scognetlargest integer with the same sign as `a' is returned. 1413129203Scognet------------------------------------------------------------------------------- 1414129203Scognet*/ 1415129203Scognetint64 float32_to_int64( float32 a ) 1416129203Scognet{ 1417129203Scognet flag aSign; 1418129203Scognet int16 aExp, shiftCount; 1419129203Scognet bits32 aSig; 1420129203Scognet bits64 aSig64, aSigExtra; 1421129203Scognet 1422129203Scognet aSig = extractFloat32Frac( a ); 1423129203Scognet aExp = extractFloat32Exp( a ); 1424129203Scognet aSign = extractFloat32Sign( a ); 1425129203Scognet shiftCount = 0xBE - aExp; 1426129203Scognet if ( shiftCount < 0 ) { 1427129203Scognet float_raise( float_flag_invalid ); 1428129203Scognet if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 1429129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 1430129203Scognet } 1431129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 1432129203Scognet } 1433129203Scognet if ( aExp ) aSig |= 0x00800000; 1434129203Scognet aSig64 = aSig; 1435129203Scognet aSig64 <<= 40; 1436129203Scognet shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra ); 1437129203Scognet return roundAndPackInt64( aSign, aSig64, aSigExtra ); 1438129203Scognet 1439129203Scognet} 1440129203Scognet 1441129203Scognet/* 1442129203Scognet------------------------------------------------------------------------------- 1443129203ScognetReturns the result of converting the single-precision floating-point value 1444129203Scognet`a' to the 64-bit two's complement integer format. The conversion is 1445129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 1446129203ScognetArithmetic, except that the conversion is always rounded toward zero. If 1447129203Scognet`a' is a NaN, the largest positive integer is returned. Otherwise, if the 1448129203Scognetconversion overflows, the largest integer with the same sign as `a' is 1449129203Scognetreturned. 1450129203Scognet------------------------------------------------------------------------------- 1451129203Scognet*/ 1452129203Scognetint64 float32_to_int64_round_to_zero( float32 a ) 1453129203Scognet{ 1454129203Scognet flag aSign; 1455129203Scognet int16 aExp, shiftCount; 1456129203Scognet bits32 aSig; 1457129203Scognet bits64 aSig64; 1458129203Scognet int64 z; 1459129203Scognet 1460129203Scognet aSig = extractFloat32Frac( a ); 1461129203Scognet aExp = extractFloat32Exp( a ); 1462129203Scognet aSign = extractFloat32Sign( a ); 1463129203Scognet shiftCount = aExp - 0xBE; 1464129203Scognet if ( 0 <= shiftCount ) { 1465129203Scognet if ( a != 0xDF000000 ) { 1466129203Scognet float_raise( float_flag_invalid ); 1467129203Scognet if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 1468129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 1469129203Scognet } 1470129203Scognet } 1471129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 1472129203Scognet } 1473129203Scognet else if ( aExp <= 0x7E ) { 1474129203Scognet if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 1475129203Scognet return 0; 1476129203Scognet } 1477129203Scognet aSig64 = aSig | 0x00800000; 1478129203Scognet aSig64 <<= 40; 1479129203Scognet z = aSig64>>( - shiftCount ); 1480129203Scognet if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) { 1481129203Scognet float_exception_flags |= float_flag_inexact; 1482129203Scognet } 1483129203Scognet if ( aSign ) z = - z; 1484129203Scognet return z; 1485129203Scognet 1486129203Scognet} 1487129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 1488129203Scognet 1489129203Scognet/* 1490129203Scognet------------------------------------------------------------------------------- 1491129203ScognetReturns the result of converting the single-precision floating-point value 1492129203Scognet`a' to the double-precision floating-point format. The conversion is 1493129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 1494129203ScognetArithmetic. 1495129203Scognet------------------------------------------------------------------------------- 1496129203Scognet*/ 1497129203Scognetfloat64 float32_to_float64( float32 a ) 1498129203Scognet{ 1499129203Scognet flag aSign; 1500129203Scognet int16 aExp; 1501129203Scognet bits32 aSig; 1502129203Scognet 1503129203Scognet aSig = extractFloat32Frac( a ); 1504129203Scognet aExp = extractFloat32Exp( a ); 1505129203Scognet aSign = extractFloat32Sign( a ); 1506129203Scognet if ( aExp == 0xFF ) { 1507129203Scognet if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); 1508129203Scognet return packFloat64( aSign, 0x7FF, 0 ); 1509129203Scognet } 1510129203Scognet if ( aExp == 0 ) { 1511129203Scognet if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); 1512129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1513129203Scognet --aExp; 1514129203Scognet } 1515129203Scognet return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); 1516129203Scognet 1517129203Scognet} 1518129203Scognet 1519129203Scognet#ifdef FLOATX80 1520129203Scognet 1521129203Scognet/* 1522129203Scognet------------------------------------------------------------------------------- 1523129203ScognetReturns the result of converting the single-precision floating-point value 1524129203Scognet`a' to the extended double-precision floating-point format. The conversion 1525129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 1526129203ScognetArithmetic. 1527129203Scognet------------------------------------------------------------------------------- 1528129203Scognet*/ 1529129203Scognetfloatx80 float32_to_floatx80( float32 a ) 1530129203Scognet{ 1531129203Scognet flag aSign; 1532129203Scognet int16 aExp; 1533129203Scognet bits32 aSig; 1534129203Scognet 1535129203Scognet aSig = extractFloat32Frac( a ); 1536129203Scognet aExp = extractFloat32Exp( a ); 1537129203Scognet aSign = extractFloat32Sign( a ); 1538129203Scognet if ( aExp == 0xFF ) { 1539129203Scognet if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); 1540129203Scognet return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 1541129203Scognet } 1542129203Scognet if ( aExp == 0 ) { 1543129203Scognet if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 1544129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1545129203Scognet } 1546129203Scognet aSig |= 0x00800000; 1547129203Scognet return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); 1548129203Scognet 1549129203Scognet} 1550129203Scognet 1551129203Scognet#endif 1552129203Scognet 1553129203Scognet#ifdef FLOAT128 1554129203Scognet 1555129203Scognet/* 1556129203Scognet------------------------------------------------------------------------------- 1557129203ScognetReturns the result of converting the single-precision floating-point value 1558129203Scognet`a' to the double-precision floating-point format. The conversion is 1559129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 1560129203ScognetArithmetic. 1561129203Scognet------------------------------------------------------------------------------- 1562129203Scognet*/ 1563129203Scognetfloat128 float32_to_float128( float32 a ) 1564129203Scognet{ 1565129203Scognet flag aSign; 1566129203Scognet int16 aExp; 1567129203Scognet bits32 aSig; 1568129203Scognet 1569129203Scognet aSig = extractFloat32Frac( a ); 1570129203Scognet aExp = extractFloat32Exp( a ); 1571129203Scognet aSign = extractFloat32Sign( a ); 1572129203Scognet if ( aExp == 0xFF ) { 1573129203Scognet if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) ); 1574129203Scognet return packFloat128( aSign, 0x7FFF, 0, 0 ); 1575129203Scognet } 1576129203Scognet if ( aExp == 0 ) { 1577129203Scognet if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); 1578129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1579129203Scognet --aExp; 1580129203Scognet } 1581129203Scognet return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 ); 1582129203Scognet 1583129203Scognet} 1584129203Scognet 1585129203Scognet#endif 1586129203Scognet 1587129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1588129203Scognet/* 1589129203Scognet------------------------------------------------------------------------------- 1590129203ScognetRounds the single-precision floating-point value `a' to an integer, and 1591129203Scognetreturns the result as a single-precision floating-point value. The 1592129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary 1593129203ScognetFloating-Point Arithmetic. 1594129203Scognet------------------------------------------------------------------------------- 1595129203Scognet*/ 1596129203Scognetfloat32 float32_round_to_int( float32 a ) 1597129203Scognet{ 1598129203Scognet flag aSign; 1599129203Scognet int16 aExp; 1600129203Scognet bits32 lastBitMask, roundBitsMask; 1601129203Scognet int8 roundingMode; 1602129203Scognet float32 z; 1603129203Scognet 1604129203Scognet aExp = extractFloat32Exp( a ); 1605129203Scognet if ( 0x96 <= aExp ) { 1606129203Scognet if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { 1607129203Scognet return propagateFloat32NaN( a, a ); 1608129203Scognet } 1609129203Scognet return a; 1610129203Scognet } 1611129203Scognet if ( aExp <= 0x7E ) { 1612129203Scognet if ( (bits32) ( a<<1 ) == 0 ) return a; 1613129203Scognet float_exception_flags |= float_flag_inexact; 1614129203Scognet aSign = extractFloat32Sign( a ); 1615129203Scognet switch ( float_rounding_mode ) { 1616129203Scognet case float_round_nearest_even: 1617129203Scognet if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { 1618129203Scognet return packFloat32( aSign, 0x7F, 0 ); 1619129203Scognet } 1620129203Scognet break; 1621129203Scognet case float_round_to_zero: 1622129203Scognet break; 1623129203Scognet case float_round_down: 1624129203Scognet return aSign ? 0xBF800000 : 0; 1625129203Scognet case float_round_up: 1626129203Scognet return aSign ? 0x80000000 : 0x3F800000; 1627129203Scognet } 1628129203Scognet return packFloat32( aSign, 0, 0 ); 1629129203Scognet } 1630129203Scognet lastBitMask = 1; 1631129203Scognet lastBitMask <<= 0x96 - aExp; 1632129203Scognet roundBitsMask = lastBitMask - 1; 1633129203Scognet z = a; 1634129203Scognet roundingMode = float_rounding_mode; 1635129203Scognet if ( roundingMode == float_round_nearest_even ) { 1636129203Scognet z += lastBitMask>>1; 1637129203Scognet if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 1638129203Scognet } 1639129203Scognet else if ( roundingMode != float_round_to_zero ) { 1640129203Scognet if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { 1641129203Scognet z += roundBitsMask; 1642129203Scognet } 1643129203Scognet } 1644129203Scognet z &= ~ roundBitsMask; 1645129203Scognet if ( z != a ) float_exception_flags |= float_flag_inexact; 1646129203Scognet return z; 1647129203Scognet 1648129203Scognet} 1649129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 1650129203Scognet 1651129203Scognet/* 1652129203Scognet------------------------------------------------------------------------------- 1653129203ScognetReturns the result of adding the absolute values of the single-precision 1654129203Scognetfloating-point values `a' and `b'. If `zSign' is 1, the sum is negated 1655129203Scognetbefore being returned. `zSign' is ignored if the result is a NaN. 1656129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary 1657129203ScognetFloating-Point Arithmetic. 1658129203Scognet------------------------------------------------------------------------------- 1659129203Scognet*/ 1660129203Scognetstatic float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) 1661129203Scognet{ 1662129203Scognet int16 aExp, bExp, zExp; 1663129203Scognet bits32 aSig, bSig, zSig; 1664129203Scognet int16 expDiff; 1665129203Scognet 1666129203Scognet aSig = extractFloat32Frac( a ); 1667129203Scognet aExp = extractFloat32Exp( a ); 1668129203Scognet bSig = extractFloat32Frac( b ); 1669129203Scognet bExp = extractFloat32Exp( b ); 1670129203Scognet expDiff = aExp - bExp; 1671129203Scognet aSig <<= 6; 1672129203Scognet bSig <<= 6; 1673129203Scognet if ( 0 < expDiff ) { 1674129203Scognet if ( aExp == 0xFF ) { 1675129203Scognet if ( aSig ) return propagateFloat32NaN( a, b ); 1676129203Scognet return a; 1677129203Scognet } 1678129203Scognet if ( bExp == 0 ) { 1679129203Scognet --expDiff; 1680129203Scognet } 1681129203Scognet else { 1682129203Scognet bSig |= 0x20000000; 1683129203Scognet } 1684129203Scognet shift32RightJamming( bSig, expDiff, &bSig ); 1685129203Scognet zExp = aExp; 1686129203Scognet } 1687129203Scognet else if ( expDiff < 0 ) { 1688129203Scognet if ( bExp == 0xFF ) { 1689129203Scognet if ( bSig ) return propagateFloat32NaN( a, b ); 1690129203Scognet return packFloat32( zSign, 0xFF, 0 ); 1691129203Scognet } 1692129203Scognet if ( aExp == 0 ) { 1693129203Scognet ++expDiff; 1694129203Scognet } 1695129203Scognet else { 1696129203Scognet aSig |= 0x20000000; 1697129203Scognet } 1698129203Scognet shift32RightJamming( aSig, - expDiff, &aSig ); 1699129203Scognet zExp = bExp; 1700129203Scognet } 1701129203Scognet else { 1702129203Scognet if ( aExp == 0xFF ) { 1703129203Scognet if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 1704129203Scognet return a; 1705129203Scognet } 1706129203Scognet if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); 1707129203Scognet zSig = 0x40000000 + aSig + bSig; 1708129203Scognet zExp = aExp; 1709129203Scognet goto roundAndPack; 1710129203Scognet } 1711129203Scognet aSig |= 0x20000000; 1712129203Scognet zSig = ( aSig + bSig )<<1; 1713129203Scognet --zExp; 1714129203Scognet if ( (sbits32) zSig < 0 ) { 1715129203Scognet zSig = aSig + bSig; 1716129203Scognet ++zExp; 1717129203Scognet } 1718129203Scognet roundAndPack: 1719129203Scognet return roundAndPackFloat32( zSign, zExp, zSig ); 1720129203Scognet 1721129203Scognet} 1722129203Scognet 1723129203Scognet/* 1724129203Scognet------------------------------------------------------------------------------- 1725129203ScognetReturns the result of subtracting the absolute values of the single- 1726129203Scognetprecision floating-point values `a' and `b'. If `zSign' is 1, the 1727129203Scognetdifference is negated before being returned. `zSign' is ignored if the 1728129203Scognetresult is a NaN. The subtraction is performed according to the IEC/IEEE 1729129203ScognetStandard for Binary Floating-Point Arithmetic. 1730129203Scognet------------------------------------------------------------------------------- 1731129203Scognet*/ 1732129203Scognetstatic float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) 1733129203Scognet{ 1734129203Scognet int16 aExp, bExp, zExp; 1735129203Scognet bits32 aSig, bSig, zSig; 1736129203Scognet int16 expDiff; 1737129203Scognet 1738129203Scognet aSig = extractFloat32Frac( a ); 1739129203Scognet aExp = extractFloat32Exp( a ); 1740129203Scognet bSig = extractFloat32Frac( b ); 1741129203Scognet bExp = extractFloat32Exp( b ); 1742129203Scognet expDiff = aExp - bExp; 1743129203Scognet aSig <<= 7; 1744129203Scognet bSig <<= 7; 1745129203Scognet if ( 0 < expDiff ) goto aExpBigger; 1746129203Scognet if ( expDiff < 0 ) goto bExpBigger; 1747129203Scognet if ( aExp == 0xFF ) { 1748129203Scognet if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 1749129203Scognet float_raise( float_flag_invalid ); 1750129203Scognet return float32_default_nan; 1751129203Scognet } 1752129203Scognet if ( aExp == 0 ) { 1753129203Scognet aExp = 1; 1754129203Scognet bExp = 1; 1755129203Scognet } 1756129203Scognet if ( bSig < aSig ) goto aBigger; 1757129203Scognet if ( aSig < bSig ) goto bBigger; 1758129203Scognet return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); 1759129203Scognet bExpBigger: 1760129203Scognet if ( bExp == 0xFF ) { 1761129203Scognet if ( bSig ) return propagateFloat32NaN( a, b ); 1762129203Scognet return packFloat32( zSign ^ 1, 0xFF, 0 ); 1763129203Scognet } 1764129203Scognet if ( aExp == 0 ) { 1765129203Scognet ++expDiff; 1766129203Scognet } 1767129203Scognet else { 1768129203Scognet aSig |= 0x40000000; 1769129203Scognet } 1770129203Scognet shift32RightJamming( aSig, - expDiff, &aSig ); 1771129203Scognet bSig |= 0x40000000; 1772129203Scognet bBigger: 1773129203Scognet zSig = bSig - aSig; 1774129203Scognet zExp = bExp; 1775129203Scognet zSign ^= 1; 1776129203Scognet goto normalizeRoundAndPack; 1777129203Scognet aExpBigger: 1778129203Scognet if ( aExp == 0xFF ) { 1779129203Scognet if ( aSig ) return propagateFloat32NaN( a, b ); 1780129203Scognet return a; 1781129203Scognet } 1782129203Scognet if ( bExp == 0 ) { 1783129203Scognet --expDiff; 1784129203Scognet } 1785129203Scognet else { 1786129203Scognet bSig |= 0x40000000; 1787129203Scognet } 1788129203Scognet shift32RightJamming( bSig, expDiff, &bSig ); 1789129203Scognet aSig |= 0x40000000; 1790129203Scognet aBigger: 1791129203Scognet zSig = aSig - bSig; 1792129203Scognet zExp = aExp; 1793129203Scognet normalizeRoundAndPack: 1794129203Scognet --zExp; 1795129203Scognet return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); 1796129203Scognet 1797129203Scognet} 1798129203Scognet 1799129203Scognet/* 1800129203Scognet------------------------------------------------------------------------------- 1801129203ScognetReturns the result of adding the single-precision floating-point values `a' 1802129203Scognetand `b'. The operation is performed according to the IEC/IEEE Standard for 1803129203ScognetBinary Floating-Point Arithmetic. 1804129203Scognet------------------------------------------------------------------------------- 1805129203Scognet*/ 1806129203Scognetfloat32 float32_add( float32 a, float32 b ) 1807129203Scognet{ 1808129203Scognet flag aSign, bSign; 1809129203Scognet 1810129203Scognet aSign = extractFloat32Sign( a ); 1811129203Scognet bSign = extractFloat32Sign( b ); 1812129203Scognet if ( aSign == bSign ) { 1813129203Scognet return addFloat32Sigs( a, b, aSign ); 1814129203Scognet } 1815129203Scognet else { 1816129203Scognet return subFloat32Sigs( a, b, aSign ); 1817129203Scognet } 1818129203Scognet 1819129203Scognet} 1820129203Scognet 1821129203Scognet/* 1822129203Scognet------------------------------------------------------------------------------- 1823129203ScognetReturns the result of subtracting the single-precision floating-point values 1824129203Scognet`a' and `b'. The operation is performed according to the IEC/IEEE Standard 1825129203Scognetfor Binary Floating-Point Arithmetic. 1826129203Scognet------------------------------------------------------------------------------- 1827129203Scognet*/ 1828129203Scognetfloat32 float32_sub( float32 a, float32 b ) 1829129203Scognet{ 1830129203Scognet flag aSign, bSign; 1831129203Scognet 1832129203Scognet aSign = extractFloat32Sign( a ); 1833129203Scognet bSign = extractFloat32Sign( b ); 1834129203Scognet if ( aSign == bSign ) { 1835129203Scognet return subFloat32Sigs( a, b, aSign ); 1836129203Scognet } 1837129203Scognet else { 1838129203Scognet return addFloat32Sigs( a, b, aSign ); 1839129203Scognet } 1840129203Scognet 1841129203Scognet} 1842129203Scognet 1843129203Scognet/* 1844129203Scognet------------------------------------------------------------------------------- 1845129203ScognetReturns the result of multiplying the single-precision floating-point values 1846129203Scognet`a' and `b'. The operation is performed according to the IEC/IEEE Standard 1847129203Scognetfor Binary Floating-Point Arithmetic. 1848129203Scognet------------------------------------------------------------------------------- 1849129203Scognet*/ 1850129203Scognetfloat32 float32_mul( float32 a, float32 b ) 1851129203Scognet{ 1852129203Scognet flag aSign, bSign, zSign; 1853129203Scognet int16 aExp, bExp, zExp; 1854129203Scognet bits32 aSig, bSig; 1855129203Scognet bits64 zSig64; 1856129203Scognet bits32 zSig; 1857129203Scognet 1858129203Scognet aSig = extractFloat32Frac( a ); 1859129203Scognet aExp = extractFloat32Exp( a ); 1860129203Scognet aSign = extractFloat32Sign( a ); 1861129203Scognet bSig = extractFloat32Frac( b ); 1862129203Scognet bExp = extractFloat32Exp( b ); 1863129203Scognet bSign = extractFloat32Sign( b ); 1864129203Scognet zSign = aSign ^ bSign; 1865129203Scognet if ( aExp == 0xFF ) { 1866129203Scognet if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 1867129203Scognet return propagateFloat32NaN( a, b ); 1868129203Scognet } 1869129203Scognet if ( ( bExp | bSig ) == 0 ) { 1870129203Scognet float_raise( float_flag_invalid ); 1871129203Scognet return float32_default_nan; 1872129203Scognet } 1873129203Scognet return packFloat32( zSign, 0xFF, 0 ); 1874129203Scognet } 1875129203Scognet if ( bExp == 0xFF ) { 1876129203Scognet if ( bSig ) return propagateFloat32NaN( a, b ); 1877129203Scognet if ( ( aExp | aSig ) == 0 ) { 1878129203Scognet float_raise( float_flag_invalid ); 1879129203Scognet return float32_default_nan; 1880129203Scognet } 1881129203Scognet return packFloat32( zSign, 0xFF, 0 ); 1882129203Scognet } 1883129203Scognet if ( aExp == 0 ) { 1884129203Scognet if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1885129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1886129203Scognet } 1887129203Scognet if ( bExp == 0 ) { 1888129203Scognet if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); 1889129203Scognet normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1890129203Scognet } 1891129203Scognet zExp = aExp + bExp - 0x7F; 1892129203Scognet aSig = ( aSig | 0x00800000 )<<7; 1893129203Scognet bSig = ( bSig | 0x00800000 )<<8; 1894129203Scognet shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 ); 1895129203Scognet zSig = zSig64; 1896129203Scognet if ( 0 <= (sbits32) ( zSig<<1 ) ) { 1897129203Scognet zSig <<= 1; 1898129203Scognet --zExp; 1899129203Scognet } 1900129203Scognet return roundAndPackFloat32( zSign, zExp, zSig ); 1901129203Scognet 1902129203Scognet} 1903129203Scognet 1904129203Scognet/* 1905129203Scognet------------------------------------------------------------------------------- 1906129203ScognetReturns the result of dividing the single-precision floating-point value `a' 1907129203Scognetby the corresponding value `b'. The operation is performed according to the 1908129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 1909129203Scognet------------------------------------------------------------------------------- 1910129203Scognet*/ 1911129203Scognetfloat32 float32_div( float32 a, float32 b ) 1912129203Scognet{ 1913129203Scognet flag aSign, bSign, zSign; 1914129203Scognet int16 aExp, bExp, zExp; 1915129203Scognet bits32 aSig, bSig, zSig; 1916129203Scognet 1917129203Scognet aSig = extractFloat32Frac( a ); 1918129203Scognet aExp = extractFloat32Exp( a ); 1919129203Scognet aSign = extractFloat32Sign( a ); 1920129203Scognet bSig = extractFloat32Frac( b ); 1921129203Scognet bExp = extractFloat32Exp( b ); 1922129203Scognet bSign = extractFloat32Sign( b ); 1923129203Scognet zSign = aSign ^ bSign; 1924129203Scognet if ( aExp == 0xFF ) { 1925129203Scognet if ( aSig ) return propagateFloat32NaN( a, b ); 1926129203Scognet if ( bExp == 0xFF ) { 1927129203Scognet if ( bSig ) return propagateFloat32NaN( a, b ); 1928129203Scognet float_raise( float_flag_invalid ); 1929129203Scognet return float32_default_nan; 1930129203Scognet } 1931129203Scognet return packFloat32( zSign, 0xFF, 0 ); 1932129203Scognet } 1933129203Scognet if ( bExp == 0xFF ) { 1934129203Scognet if ( bSig ) return propagateFloat32NaN( a, b ); 1935129203Scognet return packFloat32( zSign, 0, 0 ); 1936129203Scognet } 1937129203Scognet if ( bExp == 0 ) { 1938129203Scognet if ( bSig == 0 ) { 1939129203Scognet if ( ( aExp | aSig ) == 0 ) { 1940129203Scognet float_raise( float_flag_invalid ); 1941129203Scognet return float32_default_nan; 1942129203Scognet } 1943129203Scognet float_raise( float_flag_divbyzero ); 1944129203Scognet return packFloat32( zSign, 0xFF, 0 ); 1945129203Scognet } 1946129203Scognet normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 1947129203Scognet } 1948129203Scognet if ( aExp == 0 ) { 1949129203Scognet if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 1950129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1951129203Scognet } 1952129203Scognet zExp = aExp - bExp + 0x7D; 1953129203Scognet aSig = ( aSig | 0x00800000 )<<7; 1954129203Scognet bSig = ( bSig | 0x00800000 )<<8; 1955129203Scognet if ( bSig <= ( aSig + aSig ) ) { 1956129203Scognet aSig >>= 1; 1957129203Scognet ++zExp; 1958129203Scognet } 1959129203Scognet zSig = ( ( (bits64) aSig )<<32 ) / bSig; 1960129203Scognet if ( ( zSig & 0x3F ) == 0 ) { 1961129203Scognet zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 ); 1962129203Scognet } 1963129203Scognet return roundAndPackFloat32( zSign, zExp, zSig ); 1964129203Scognet 1965129203Scognet} 1966129203Scognet 1967129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1968129203Scognet/* 1969129203Scognet------------------------------------------------------------------------------- 1970129203ScognetReturns the remainder of the single-precision floating-point value `a' 1971129203Scognetwith respect to the corresponding value `b'. The operation is performed 1972129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1973129203Scognet------------------------------------------------------------------------------- 1974129203Scognet*/ 1975129203Scognetfloat32 float32_rem( float32 a, float32 b ) 1976129203Scognet{ 1977129203Scognet flag aSign, bSign, zSign; 1978129203Scognet int16 aExp, bExp, expDiff; 1979129203Scognet bits32 aSig, bSig; 1980129203Scognet bits32 q; 1981129203Scognet bits64 aSig64, bSig64, q64; 1982129203Scognet bits32 alternateASig; 1983129203Scognet sbits32 sigMean; 1984129203Scognet 1985129203Scognet aSig = extractFloat32Frac( a ); 1986129203Scognet aExp = extractFloat32Exp( a ); 1987129203Scognet aSign = extractFloat32Sign( a ); 1988129203Scognet bSig = extractFloat32Frac( b ); 1989129203Scognet bExp = extractFloat32Exp( b ); 1990129203Scognet bSign = extractFloat32Sign( b ); 1991129203Scognet if ( aExp == 0xFF ) { 1992129203Scognet if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 1993129203Scognet return propagateFloat32NaN( a, b ); 1994129203Scognet } 1995129203Scognet float_raise( float_flag_invalid ); 1996129203Scognet return float32_default_nan; 1997129203Scognet } 1998129203Scognet if ( bExp == 0xFF ) { 1999129203Scognet if ( bSig ) return propagateFloat32NaN( a, b ); 2000129203Scognet return a; 2001129203Scognet } 2002129203Scognet if ( bExp == 0 ) { 2003129203Scognet if ( bSig == 0 ) { 2004129203Scognet float_raise( float_flag_invalid ); 2005129203Scognet return float32_default_nan; 2006129203Scognet } 2007129203Scognet normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 2008129203Scognet } 2009129203Scognet if ( aExp == 0 ) { 2010129203Scognet if ( aSig == 0 ) return a; 2011129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 2012129203Scognet } 2013129203Scognet expDiff = aExp - bExp; 2014129203Scognet aSig |= 0x00800000; 2015129203Scognet bSig |= 0x00800000; 2016129203Scognet if ( expDiff < 32 ) { 2017129203Scognet aSig <<= 8; 2018129203Scognet bSig <<= 8; 2019129203Scognet if ( expDiff < 0 ) { 2020129203Scognet if ( expDiff < -1 ) return a; 2021129203Scognet aSig >>= 1; 2022129203Scognet } 2023129203Scognet q = ( bSig <= aSig ); 2024129203Scognet if ( q ) aSig -= bSig; 2025129203Scognet if ( 0 < expDiff ) { 2026129203Scognet q = ( ( (bits64) aSig )<<32 ) / bSig; 2027129203Scognet q >>= 32 - expDiff; 2028129203Scognet bSig >>= 2; 2029129203Scognet aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 2030129203Scognet } 2031129203Scognet else { 2032129203Scognet aSig >>= 2; 2033129203Scognet bSig >>= 2; 2034129203Scognet } 2035129203Scognet } 2036129203Scognet else { 2037129203Scognet if ( bSig <= aSig ) aSig -= bSig; 2038129203Scognet aSig64 = ( (bits64) aSig )<<40; 2039129203Scognet bSig64 = ( (bits64) bSig )<<40; 2040129203Scognet expDiff -= 64; 2041129203Scognet while ( 0 < expDiff ) { 2042129203Scognet q64 = estimateDiv128To64( aSig64, 0, bSig64 ); 2043129203Scognet q64 = ( 2 < q64 ) ? q64 - 2 : 0; 2044129203Scognet aSig64 = - ( ( bSig * q64 )<<38 ); 2045129203Scognet expDiff -= 62; 2046129203Scognet } 2047129203Scognet expDiff += 64; 2048129203Scognet q64 = estimateDiv128To64( aSig64, 0, bSig64 ); 2049129203Scognet q64 = ( 2 < q64 ) ? q64 - 2 : 0; 2050129203Scognet q = q64>>( 64 - expDiff ); 2051129203Scognet bSig <<= 6; 2052129203Scognet aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; 2053129203Scognet } 2054129203Scognet do { 2055129203Scognet alternateASig = aSig; 2056129203Scognet ++q; 2057129203Scognet aSig -= bSig; 2058129203Scognet } while ( 0 <= (sbits32) aSig ); 2059129203Scognet sigMean = aSig + alternateASig; 2060129203Scognet if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 2061129203Scognet aSig = alternateASig; 2062129203Scognet } 2063129203Scognet zSign = ( (sbits32) aSig < 0 ); 2064129203Scognet if ( zSign ) aSig = - aSig; 2065129203Scognet return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); 2066129203Scognet 2067129203Scognet} 2068129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 2069129203Scognet 2070129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2071129203Scognet/* 2072129203Scognet------------------------------------------------------------------------------- 2073129203ScognetReturns the square root of the single-precision floating-point value `a'. 2074129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary 2075129203ScognetFloating-Point Arithmetic. 2076129203Scognet------------------------------------------------------------------------------- 2077129203Scognet*/ 2078129203Scognetfloat32 float32_sqrt( float32 a ) 2079129203Scognet{ 2080129203Scognet flag aSign; 2081129203Scognet int16 aExp, zExp; 2082129203Scognet bits32 aSig, zSig; 2083129203Scognet bits64 rem, term; 2084129203Scognet 2085129203Scognet aSig = extractFloat32Frac( a ); 2086129203Scognet aExp = extractFloat32Exp( a ); 2087129203Scognet aSign = extractFloat32Sign( a ); 2088129203Scognet if ( aExp == 0xFF ) { 2089129203Scognet if ( aSig ) return propagateFloat32NaN( a, 0 ); 2090129203Scognet if ( ! aSign ) return a; 2091129203Scognet float_raise( float_flag_invalid ); 2092129203Scognet return float32_default_nan; 2093129203Scognet } 2094129203Scognet if ( aSign ) { 2095129203Scognet if ( ( aExp | aSig ) == 0 ) return a; 2096129203Scognet float_raise( float_flag_invalid ); 2097129203Scognet return float32_default_nan; 2098129203Scognet } 2099129203Scognet if ( aExp == 0 ) { 2100129203Scognet if ( aSig == 0 ) return 0; 2101129203Scognet normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 2102129203Scognet } 2103129203Scognet zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; 2104129203Scognet aSig = ( aSig | 0x00800000 )<<8; 2105129203Scognet zSig = estimateSqrt32( aExp, aSig ) + 2; 2106129203Scognet if ( ( zSig & 0x7F ) <= 5 ) { 2107129203Scognet if ( zSig < 2 ) { 2108129203Scognet zSig = 0x7FFFFFFF; 2109129203Scognet goto roundAndPack; 2110129203Scognet } 2111129203Scognet aSig >>= aExp & 1; 2112129203Scognet term = ( (bits64) zSig ) * zSig; 2113129203Scognet rem = ( ( (bits64) aSig )<<32 ) - term; 2114129203Scognet while ( (sbits64) rem < 0 ) { 2115129203Scognet --zSig; 2116129203Scognet rem += ( ( (bits64) zSig )<<1 ) | 1; 2117129203Scognet } 2118129203Scognet zSig |= ( rem != 0 ); 2119129203Scognet } 2120129203Scognet shift32RightJamming( zSig, 1, &zSig ); 2121129203Scognet roundAndPack: 2122129203Scognet return roundAndPackFloat32( 0, zExp, zSig ); 2123129203Scognet 2124129203Scognet} 2125129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 2126129203Scognet 2127129203Scognet/* 2128129203Scognet------------------------------------------------------------------------------- 2129129203ScognetReturns 1 if the single-precision floating-point value `a' is equal to 2130129203Scognetthe corresponding value `b', and 0 otherwise. The comparison is performed 2131129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2132129203Scognet------------------------------------------------------------------------------- 2133129203Scognet*/ 2134129203Scognetflag float32_eq( float32 a, float32 b ) 2135129203Scognet{ 2136129203Scognet 2137129203Scognet if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2138129203Scognet || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2139129203Scognet ) { 2140129203Scognet if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 2141129203Scognet float_raise( float_flag_invalid ); 2142129203Scognet } 2143129203Scognet return 0; 2144129203Scognet } 2145129203Scognet return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 2146129203Scognet 2147129203Scognet} 2148129203Scognet 2149129203Scognet/* 2150129203Scognet------------------------------------------------------------------------------- 2151129203ScognetReturns 1 if the single-precision floating-point value `a' is less than 2152129203Scognetor equal to the corresponding value `b', and 0 otherwise. The comparison 2153129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 2154129203ScognetArithmetic. 2155129203Scognet------------------------------------------------------------------------------- 2156129203Scognet*/ 2157129203Scognetflag float32_le( float32 a, float32 b ) 2158129203Scognet{ 2159129203Scognet flag aSign, bSign; 2160129203Scognet 2161129203Scognet if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2162129203Scognet || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2163129203Scognet ) { 2164129203Scognet float_raise( float_flag_invalid ); 2165129203Scognet return 0; 2166129203Scognet } 2167129203Scognet aSign = extractFloat32Sign( a ); 2168129203Scognet bSign = extractFloat32Sign( b ); 2169129203Scognet if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 2170129203Scognet return ( a == b ) || ( aSign ^ ( a < b ) ); 2171129203Scognet 2172129203Scognet} 2173129203Scognet 2174129203Scognet/* 2175129203Scognet------------------------------------------------------------------------------- 2176129203ScognetReturns 1 if the single-precision floating-point value `a' is less than 2177129203Scognetthe corresponding value `b', and 0 otherwise. The comparison is performed 2178129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2179129203Scognet------------------------------------------------------------------------------- 2180129203Scognet*/ 2181129203Scognetflag float32_lt( float32 a, float32 b ) 2182129203Scognet{ 2183129203Scognet flag aSign, bSign; 2184129203Scognet 2185129203Scognet if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2186129203Scognet || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2187129203Scognet ) { 2188129203Scognet float_raise( float_flag_invalid ); 2189129203Scognet return 0; 2190129203Scognet } 2191129203Scognet aSign = extractFloat32Sign( a ); 2192129203Scognet bSign = extractFloat32Sign( b ); 2193129203Scognet if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 2194129203Scognet return ( a != b ) && ( aSign ^ ( a < b ) ); 2195129203Scognet 2196129203Scognet} 2197129203Scognet 2198129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2199129203Scognet/* 2200129203Scognet------------------------------------------------------------------------------- 2201129203ScognetReturns 1 if the single-precision floating-point value `a' is equal to 2202129203Scognetthe corresponding value `b', and 0 otherwise. The invalid exception is 2203129203Scognetraised if either operand is a NaN. Otherwise, the comparison is performed 2204129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2205129203Scognet------------------------------------------------------------------------------- 2206129203Scognet*/ 2207129203Scognetflag float32_eq_signaling( float32 a, float32 b ) 2208129203Scognet{ 2209129203Scognet 2210129203Scognet if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2211129203Scognet || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2212129203Scognet ) { 2213129203Scognet float_raise( float_flag_invalid ); 2214129203Scognet return 0; 2215129203Scognet } 2216129203Scognet return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 2217129203Scognet 2218129203Scognet} 2219129203Scognet 2220129203Scognet/* 2221129203Scognet------------------------------------------------------------------------------- 2222129203ScognetReturns 1 if the single-precision floating-point value `a' is less than or 2223129203Scognetequal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 2224129203Scognetcause an exception. Otherwise, the comparison is performed according to the 2225129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 2226129203Scognet------------------------------------------------------------------------------- 2227129203Scognet*/ 2228129203Scognetflag float32_le_quiet( float32 a, float32 b ) 2229129203Scognet{ 2230129203Scognet flag aSign, bSign; 2231129203Scognet 2232129203Scognet if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2233129203Scognet || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2234129203Scognet ) { 2235129203Scognet if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 2236129203Scognet float_raise( float_flag_invalid ); 2237129203Scognet } 2238129203Scognet return 0; 2239129203Scognet } 2240129203Scognet aSign = extractFloat32Sign( a ); 2241129203Scognet bSign = extractFloat32Sign( b ); 2242129203Scognet if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 2243129203Scognet return ( a == b ) || ( aSign ^ ( a < b ) ); 2244129203Scognet 2245129203Scognet} 2246129203Scognet 2247129203Scognet/* 2248129203Scognet------------------------------------------------------------------------------- 2249129203ScognetReturns 1 if the single-precision floating-point value `a' is less than 2250129203Scognetthe corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 2251129203Scognetexception. Otherwise, the comparison is performed according to the IEC/IEEE 2252129203ScognetStandard for Binary Floating-Point Arithmetic. 2253129203Scognet------------------------------------------------------------------------------- 2254129203Scognet*/ 2255129203Scognetflag float32_lt_quiet( float32 a, float32 b ) 2256129203Scognet{ 2257129203Scognet flag aSign, bSign; 2258129203Scognet 2259129203Scognet if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 2260129203Scognet || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 2261129203Scognet ) { 2262129203Scognet if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 2263129203Scognet float_raise( float_flag_invalid ); 2264129203Scognet } 2265129203Scognet return 0; 2266129203Scognet } 2267129203Scognet aSign = extractFloat32Sign( a ); 2268129203Scognet bSign = extractFloat32Sign( b ); 2269129203Scognet if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 2270129203Scognet return ( a != b ) && ( aSign ^ ( a < b ) ); 2271129203Scognet 2272129203Scognet} 2273129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 2274129203Scognet 2275129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2276129203Scognet/* 2277129203Scognet------------------------------------------------------------------------------- 2278129203ScognetReturns the result of converting the double-precision floating-point value 2279129203Scognet`a' to the 32-bit two's complement integer format. The conversion is 2280129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 2281129203ScognetArithmetic---which means in particular that the conversion is rounded 2282129203Scognetaccording to the current rounding mode. If `a' is a NaN, the largest 2283129203Scognetpositive integer is returned. Otherwise, if the conversion overflows, the 2284129203Scognetlargest integer with the same sign as `a' is returned. 2285129203Scognet------------------------------------------------------------------------------- 2286129203Scognet*/ 2287129203Scognetint32 float64_to_int32( float64 a ) 2288129203Scognet{ 2289129203Scognet flag aSign; 2290129203Scognet int16 aExp, shiftCount; 2291129203Scognet bits64 aSig; 2292129203Scognet 2293129203Scognet aSig = extractFloat64Frac( a ); 2294129203Scognet aExp = extractFloat64Exp( a ); 2295129203Scognet aSign = extractFloat64Sign( a ); 2296129203Scognet if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; 2297129203Scognet if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 2298129203Scognet shiftCount = 0x42C - aExp; 2299129203Scognet if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); 2300129203Scognet return roundAndPackInt32( aSign, aSig ); 2301129203Scognet 2302129203Scognet} 2303129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 2304129203Scognet 2305129203Scognet/* 2306129203Scognet------------------------------------------------------------------------------- 2307129203ScognetReturns the result of converting the double-precision floating-point value 2308129203Scognet`a' to the 32-bit two's complement integer format. The conversion is 2309129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 2310129203ScognetArithmetic, except that the conversion is always rounded toward zero. 2311129203ScognetIf `a' is a NaN, the largest positive integer is returned. Otherwise, if 2312129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is 2313129203Scognetreturned. 2314129203Scognet------------------------------------------------------------------------------- 2315129203Scognet*/ 2316129203Scognetint32 float64_to_int32_round_to_zero( float64 a ) 2317129203Scognet{ 2318129203Scognet flag aSign; 2319129203Scognet int16 aExp, shiftCount; 2320129203Scognet bits64 aSig, savedASig; 2321129203Scognet int32 z; 2322129203Scognet 2323129203Scognet aSig = extractFloat64Frac( a ); 2324129203Scognet aExp = extractFloat64Exp( a ); 2325129203Scognet aSign = extractFloat64Sign( a ); 2326129203Scognet if ( 0x41E < aExp ) { 2327129203Scognet if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; 2328129203Scognet goto invalid; 2329129203Scognet } 2330129203Scognet else if ( aExp < 0x3FF ) { 2331129203Scognet if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; 2332129203Scognet return 0; 2333129203Scognet } 2334129203Scognet aSig |= LIT64( 0x0010000000000000 ); 2335129203Scognet shiftCount = 0x433 - aExp; 2336129203Scognet savedASig = aSig; 2337129203Scognet aSig >>= shiftCount; 2338129203Scognet z = aSig; 2339129203Scognet if ( aSign ) z = - z; 2340129203Scognet if ( ( z < 0 ) ^ aSign ) { 2341129203Scognet invalid: 2342129203Scognet float_raise( float_flag_invalid ); 2343129203Scognet return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 2344129203Scognet } 2345129203Scognet if ( ( aSig<<shiftCount ) != savedASig ) { 2346129203Scognet float_exception_flags |= float_flag_inexact; 2347129203Scognet } 2348129203Scognet return z; 2349129203Scognet 2350129203Scognet} 2351129203Scognet 2352129203Scognet#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 2353129203Scognet/* 2354129203Scognet------------------------------------------------------------------------------- 2355129203ScognetReturns the result of converting the double-precision floating-point value 2356129203Scognet`a' to the 64-bit two's complement integer format. The conversion is 2357129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 2358129203ScognetArithmetic---which means in particular that the conversion is rounded 2359129203Scognetaccording to the current rounding mode. If `a' is a NaN, the largest 2360129203Scognetpositive integer is returned. Otherwise, if the conversion overflows, the 2361129203Scognetlargest integer with the same sign as `a' is returned. 2362129203Scognet------------------------------------------------------------------------------- 2363129203Scognet*/ 2364129203Scognetint64 float64_to_int64( float64 a ) 2365129203Scognet{ 2366129203Scognet flag aSign; 2367129203Scognet int16 aExp, shiftCount; 2368129203Scognet bits64 aSig, aSigExtra; 2369129203Scognet 2370129203Scognet aSig = extractFloat64Frac( a ); 2371129203Scognet aExp = extractFloat64Exp( a ); 2372129203Scognet aSign = extractFloat64Sign( a ); 2373129203Scognet if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 2374129203Scognet shiftCount = 0x433 - aExp; 2375129203Scognet if ( shiftCount <= 0 ) { 2376129203Scognet if ( 0x43E < aExp ) { 2377129203Scognet float_raise( float_flag_invalid ); 2378129203Scognet if ( ! aSign 2379129203Scognet || ( ( aExp == 0x7FF ) 2380129203Scognet && ( aSig != LIT64( 0x0010000000000000 ) ) ) 2381129203Scognet ) { 2382129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 2383129203Scognet } 2384129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 2385129203Scognet } 2386129203Scognet aSigExtra = 0; 2387129203Scognet aSig <<= - shiftCount; 2388129203Scognet } 2389129203Scognet else { 2390129203Scognet shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); 2391129203Scognet } 2392129203Scognet return roundAndPackInt64( aSign, aSig, aSigExtra ); 2393129203Scognet 2394129203Scognet} 2395129203Scognet 2396129203Scognet/* 2397129203Scognet------------------------------------------------------------------------------- 2398129203ScognetReturns the result of converting the double-precision floating-point value 2399129203Scognet`a' to the 64-bit two's complement integer format. The conversion is 2400129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 2401129203ScognetArithmetic, except that the conversion is always rounded toward zero. 2402129203ScognetIf `a' is a NaN, the largest positive integer is returned. Otherwise, if 2403129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is 2404129203Scognetreturned. 2405129203Scognet------------------------------------------------------------------------------- 2406129203Scognet*/ 2407129203Scognetint64 float64_to_int64_round_to_zero( float64 a ) 2408129203Scognet{ 2409129203Scognet flag aSign; 2410129203Scognet int16 aExp, shiftCount; 2411129203Scognet bits64 aSig; 2412129203Scognet int64 z; 2413129203Scognet 2414129203Scognet aSig = extractFloat64Frac( a ); 2415129203Scognet aExp = extractFloat64Exp( a ); 2416129203Scognet aSign = extractFloat64Sign( a ); 2417129203Scognet if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 2418129203Scognet shiftCount = aExp - 0x433; 2419129203Scognet if ( 0 <= shiftCount ) { 2420129203Scognet if ( 0x43E <= aExp ) { 2421129203Scognet if ( a != LIT64( 0xC3E0000000000000 ) ) { 2422129203Scognet float_raise( float_flag_invalid ); 2423129203Scognet if ( ! aSign 2424129203Scognet || ( ( aExp == 0x7FF ) 2425129203Scognet && ( aSig != LIT64( 0x0010000000000000 ) ) ) 2426129203Scognet ) { 2427129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 2428129203Scognet } 2429129203Scognet } 2430129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 2431129203Scognet } 2432129203Scognet z = aSig<<shiftCount; 2433129203Scognet } 2434129203Scognet else { 2435129203Scognet if ( aExp < 0x3FE ) { 2436129203Scognet if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 2437129203Scognet return 0; 2438129203Scognet } 2439129203Scognet z = aSig>>( - shiftCount ); 2440129203Scognet if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { 2441129203Scognet float_exception_flags |= float_flag_inexact; 2442129203Scognet } 2443129203Scognet } 2444129203Scognet if ( aSign ) z = - z; 2445129203Scognet return z; 2446129203Scognet 2447129203Scognet} 2448129203Scognet#endif /* !SOFTFLOAT_FOR_GCC */ 2449129203Scognet 2450129203Scognet/* 2451129203Scognet------------------------------------------------------------------------------- 2452129203ScognetReturns the result of converting the double-precision floating-point value 2453129203Scognet`a' to the single-precision floating-point format. The conversion is 2454129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 2455129203ScognetArithmetic. 2456129203Scognet------------------------------------------------------------------------------- 2457129203Scognet*/ 2458129203Scognetfloat32 float64_to_float32( float64 a ) 2459129203Scognet{ 2460129203Scognet flag aSign; 2461129203Scognet int16 aExp; 2462129203Scognet bits64 aSig; 2463129203Scognet bits32 zSig; 2464129203Scognet 2465129203Scognet aSig = extractFloat64Frac( a ); 2466129203Scognet aExp = extractFloat64Exp( a ); 2467129203Scognet aSign = extractFloat64Sign( a ); 2468129203Scognet if ( aExp == 0x7FF ) { 2469129203Scognet if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) ); 2470129203Scognet return packFloat32( aSign, 0xFF, 0 ); 2471129203Scognet } 2472129203Scognet shift64RightJamming( aSig, 22, &aSig ); 2473129203Scognet zSig = aSig; 2474129203Scognet if ( aExp || zSig ) { 2475129203Scognet zSig |= 0x40000000; 2476129203Scognet aExp -= 0x381; 2477129203Scognet } 2478129203Scognet return roundAndPackFloat32( aSign, aExp, zSig ); 2479129203Scognet 2480129203Scognet} 2481129203Scognet 2482129203Scognet#ifdef FLOATX80 2483129203Scognet 2484129203Scognet/* 2485129203Scognet------------------------------------------------------------------------------- 2486129203ScognetReturns the result of converting the double-precision floating-point value 2487129203Scognet`a' to the extended double-precision floating-point format. The conversion 2488129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 2489129203ScognetArithmetic. 2490129203Scognet------------------------------------------------------------------------------- 2491129203Scognet*/ 2492129203Scognetfloatx80 float64_to_floatx80( float64 a ) 2493129203Scognet{ 2494129203Scognet flag aSign; 2495129203Scognet int16 aExp; 2496129203Scognet bits64 aSig; 2497129203Scognet 2498129203Scognet aSig = extractFloat64Frac( a ); 2499129203Scognet aExp = extractFloat64Exp( a ); 2500129203Scognet aSign = extractFloat64Sign( a ); 2501129203Scognet if ( aExp == 0x7FF ) { 2502129203Scognet if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); 2503129203Scognet return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 2504129203Scognet } 2505129203Scognet if ( aExp == 0 ) { 2506129203Scognet if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 2507129203Scognet normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2508129203Scognet } 2509129203Scognet return 2510129203Scognet packFloatx80( 2511129203Scognet aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); 2512129203Scognet 2513129203Scognet} 2514129203Scognet 2515129203Scognet#endif 2516129203Scognet 2517129203Scognet#ifdef FLOAT128 2518129203Scognet 2519129203Scognet/* 2520129203Scognet------------------------------------------------------------------------------- 2521129203ScognetReturns the result of converting the double-precision floating-point value 2522129203Scognet`a' to the quadruple-precision floating-point format. The conversion is 2523129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 2524129203ScognetArithmetic. 2525129203Scognet------------------------------------------------------------------------------- 2526129203Scognet*/ 2527129203Scognetfloat128 float64_to_float128( float64 a ) 2528129203Scognet{ 2529129203Scognet flag aSign; 2530129203Scognet int16 aExp; 2531129203Scognet bits64 aSig, zSig0, zSig1; 2532129203Scognet 2533129203Scognet aSig = extractFloat64Frac( a ); 2534129203Scognet aExp = extractFloat64Exp( a ); 2535129203Scognet aSign = extractFloat64Sign( a ); 2536129203Scognet if ( aExp == 0x7FF ) { 2537129203Scognet if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) ); 2538129203Scognet return packFloat128( aSign, 0x7FFF, 0, 0 ); 2539129203Scognet } 2540129203Scognet if ( aExp == 0 ) { 2541129203Scognet if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); 2542129203Scognet normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2543129203Scognet --aExp; 2544129203Scognet } 2545129203Scognet shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); 2546129203Scognet return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); 2547129203Scognet 2548129203Scognet} 2549129203Scognet 2550129203Scognet#endif 2551129203Scognet 2552129203Scognet#ifndef SOFTFLOAT_FOR_GCC 2553129203Scognet/* 2554129203Scognet------------------------------------------------------------------------------- 2555129203ScognetRounds the double-precision floating-point value `a' to an integer, and 2556129203Scognetreturns the result as a double-precision floating-point value. The 2557129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary 2558129203ScognetFloating-Point Arithmetic. 2559129203Scognet------------------------------------------------------------------------------- 2560129203Scognet*/ 2561129203Scognetfloat64 float64_round_to_int( float64 a ) 2562129203Scognet{ 2563129203Scognet flag aSign; 2564129203Scognet int16 aExp; 2565129203Scognet bits64 lastBitMask, roundBitsMask; 2566129203Scognet int8 roundingMode; 2567129203Scognet float64 z; 2568129203Scognet 2569129203Scognet aExp = extractFloat64Exp( a ); 2570129203Scognet if ( 0x433 <= aExp ) { 2571129203Scognet if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { 2572129203Scognet return propagateFloat64NaN( a, a ); 2573129203Scognet } 2574129203Scognet return a; 2575129203Scognet } 2576129203Scognet if ( aExp < 0x3FF ) { 2577129203Scognet if ( (bits64) ( a<<1 ) == 0 ) return a; 2578129203Scognet float_exception_flags |= float_flag_inexact; 2579129203Scognet aSign = extractFloat64Sign( a ); 2580129203Scognet switch ( float_rounding_mode ) { 2581129203Scognet case float_round_nearest_even: 2582129203Scognet if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { 2583129203Scognet return packFloat64( aSign, 0x3FF, 0 ); 2584129203Scognet } 2585129203Scognet break; 2586129203Scognet case float_round_to_zero: 2587129203Scognet break; 2588129203Scognet case float_round_down: 2589129203Scognet return aSign ? LIT64( 0xBFF0000000000000 ) : 0; 2590129203Scognet case float_round_up: 2591129203Scognet return 2592129203Scognet aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ); 2593129203Scognet } 2594129203Scognet return packFloat64( aSign, 0, 0 ); 2595129203Scognet } 2596129203Scognet lastBitMask = 1; 2597129203Scognet lastBitMask <<= 0x433 - aExp; 2598129203Scognet roundBitsMask = lastBitMask - 1; 2599129203Scognet z = a; 2600129203Scognet roundingMode = float_rounding_mode; 2601129203Scognet if ( roundingMode == float_round_nearest_even ) { 2602129203Scognet z += lastBitMask>>1; 2603129203Scognet if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 2604129203Scognet } 2605129203Scognet else if ( roundingMode != float_round_to_zero ) { 2606129203Scognet if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) { 2607129203Scognet z += roundBitsMask; 2608129203Scognet } 2609129203Scognet } 2610129203Scognet z &= ~ roundBitsMask; 2611129203Scognet if ( z != a ) float_exception_flags |= float_flag_inexact; 2612129203Scognet return z; 2613129203Scognet 2614129203Scognet} 2615129203Scognet#endif 2616129203Scognet 2617129203Scognet/* 2618129203Scognet------------------------------------------------------------------------------- 2619129203ScognetReturns the result of adding the absolute values of the double-precision 2620129203Scognetfloating-point values `a' and `b'. If `zSign' is 1, the sum is negated 2621129203Scognetbefore being returned. `zSign' is ignored if the result is a NaN. 2622129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary 2623129203ScognetFloating-Point Arithmetic. 2624129203Scognet------------------------------------------------------------------------------- 2625129203Scognet*/ 2626129203Scognetstatic float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) 2627129203Scognet{ 2628129203Scognet int16 aExp, bExp, zExp; 2629129203Scognet bits64 aSig, bSig, zSig; 2630129203Scognet int16 expDiff; 2631129203Scognet 2632129203Scognet aSig = extractFloat64Frac( a ); 2633129203Scognet aExp = extractFloat64Exp( a ); 2634129203Scognet bSig = extractFloat64Frac( b ); 2635129203Scognet bExp = extractFloat64Exp( b ); 2636129203Scognet expDiff = aExp - bExp; 2637129203Scognet aSig <<= 9; 2638129203Scognet bSig <<= 9; 2639129203Scognet if ( 0 < expDiff ) { 2640129203Scognet if ( aExp == 0x7FF ) { 2641129203Scognet if ( aSig ) return propagateFloat64NaN( a, b ); 2642129203Scognet return a; 2643129203Scognet } 2644129203Scognet if ( bExp == 0 ) { 2645129203Scognet --expDiff; 2646129203Scognet } 2647129203Scognet else { 2648129203Scognet bSig |= LIT64( 0x2000000000000000 ); 2649129203Scognet } 2650129203Scognet shift64RightJamming( bSig, expDiff, &bSig ); 2651129203Scognet zExp = aExp; 2652129203Scognet } 2653129203Scognet else if ( expDiff < 0 ) { 2654129203Scognet if ( bExp == 0x7FF ) { 2655129203Scognet if ( bSig ) return propagateFloat64NaN( a, b ); 2656129203Scognet return packFloat64( zSign, 0x7FF, 0 ); 2657129203Scognet } 2658129203Scognet if ( aExp == 0 ) { 2659129203Scognet ++expDiff; 2660129203Scognet } 2661129203Scognet else { 2662129203Scognet aSig |= LIT64( 0x2000000000000000 ); 2663129203Scognet } 2664129203Scognet shift64RightJamming( aSig, - expDiff, &aSig ); 2665129203Scognet zExp = bExp; 2666129203Scognet } 2667129203Scognet else { 2668129203Scognet if ( aExp == 0x7FF ) { 2669129203Scognet if ( aSig | bSig ) return propagateFloat64NaN( a, b ); 2670129203Scognet return a; 2671129203Scognet } 2672129203Scognet if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); 2673129203Scognet zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; 2674129203Scognet zExp = aExp; 2675129203Scognet goto roundAndPack; 2676129203Scognet } 2677129203Scognet aSig |= LIT64( 0x2000000000000000 ); 2678129203Scognet zSig = ( aSig + bSig )<<1; 2679129203Scognet --zExp; 2680129203Scognet if ( (sbits64) zSig < 0 ) { 2681129203Scognet zSig = aSig + bSig; 2682129203Scognet ++zExp; 2683129203Scognet } 2684129203Scognet roundAndPack: 2685129203Scognet return roundAndPackFloat64( zSign, zExp, zSig ); 2686129203Scognet 2687129203Scognet} 2688129203Scognet 2689129203Scognet/* 2690129203Scognet------------------------------------------------------------------------------- 2691129203ScognetReturns the result of subtracting the absolute values of the double- 2692129203Scognetprecision floating-point values `a' and `b'. If `zSign' is 1, the 2693129203Scognetdifference is negated before being returned. `zSign' is ignored if the 2694129203Scognetresult is a NaN. The subtraction is performed according to the IEC/IEEE 2695129203ScognetStandard for Binary Floating-Point Arithmetic. 2696129203Scognet------------------------------------------------------------------------------- 2697129203Scognet*/ 2698129203Scognetstatic float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) 2699129203Scognet{ 2700129203Scognet int16 aExp, bExp, zExp; 2701129203Scognet bits64 aSig, bSig, zSig; 2702129203Scognet int16 expDiff; 2703129203Scognet 2704129203Scognet aSig = extractFloat64Frac( a ); 2705129203Scognet aExp = extractFloat64Exp( a ); 2706129203Scognet bSig = extractFloat64Frac( b ); 2707129203Scognet bExp = extractFloat64Exp( b ); 2708129203Scognet expDiff = aExp - bExp; 2709129203Scognet aSig <<= 10; 2710129203Scognet bSig <<= 10; 2711129203Scognet if ( 0 < expDiff ) goto aExpBigger; 2712129203Scognet if ( expDiff < 0 ) goto bExpBigger; 2713129203Scognet if ( aExp == 0x7FF ) { 2714129203Scognet if ( aSig | bSig ) return propagateFloat64NaN( a, b ); 2715129203Scognet float_raise( float_flag_invalid ); 2716129203Scognet return float64_default_nan; 2717129203Scognet } 2718129203Scognet if ( aExp == 0 ) { 2719129203Scognet aExp = 1; 2720129203Scognet bExp = 1; 2721129203Scognet } 2722129203Scognet if ( bSig < aSig ) goto aBigger; 2723129203Scognet if ( aSig < bSig ) goto bBigger; 2724129203Scognet return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); 2725129203Scognet bExpBigger: 2726129203Scognet if ( bExp == 0x7FF ) { 2727129203Scognet if ( bSig ) return propagateFloat64NaN( a, b ); 2728129203Scognet return packFloat64( zSign ^ 1, 0x7FF, 0 ); 2729129203Scognet } 2730129203Scognet if ( aExp == 0 ) { 2731129203Scognet ++expDiff; 2732129203Scognet } 2733129203Scognet else { 2734129203Scognet aSig |= LIT64( 0x4000000000000000 ); 2735129203Scognet } 2736129203Scognet shift64RightJamming( aSig, - expDiff, &aSig ); 2737129203Scognet bSig |= LIT64( 0x4000000000000000 ); 2738129203Scognet bBigger: 2739129203Scognet zSig = bSig - aSig; 2740129203Scognet zExp = bExp; 2741129203Scognet zSign ^= 1; 2742129203Scognet goto normalizeRoundAndPack; 2743129203Scognet aExpBigger: 2744129203Scognet if ( aExp == 0x7FF ) { 2745129203Scognet if ( aSig ) return propagateFloat64NaN( a, b ); 2746129203Scognet return a; 2747129203Scognet } 2748129203Scognet if ( bExp == 0 ) { 2749129203Scognet --expDiff; 2750129203Scognet } 2751129203Scognet else { 2752129203Scognet bSig |= LIT64( 0x4000000000000000 ); 2753129203Scognet } 2754129203Scognet shift64RightJamming( bSig, expDiff, &bSig ); 2755129203Scognet aSig |= LIT64( 0x4000000000000000 ); 2756129203Scognet aBigger: 2757129203Scognet zSig = aSig - bSig; 2758129203Scognet zExp = aExp; 2759129203Scognet normalizeRoundAndPack: 2760129203Scognet --zExp; 2761129203Scognet return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); 2762129203Scognet 2763129203Scognet} 2764129203Scognet 2765129203Scognet/* 2766129203Scognet------------------------------------------------------------------------------- 2767129203ScognetReturns the result of adding the double-precision floating-point values `a' 2768129203Scognetand `b'. The operation is performed according to the IEC/IEEE Standard for 2769129203ScognetBinary Floating-Point Arithmetic. 2770129203Scognet------------------------------------------------------------------------------- 2771129203Scognet*/ 2772129203Scognetfloat64 float64_add( float64 a, float64 b ) 2773129203Scognet{ 2774129203Scognet flag aSign, bSign; 2775129203Scognet 2776129203Scognet aSign = extractFloat64Sign( a ); 2777129203Scognet bSign = extractFloat64Sign( b ); 2778129203Scognet if ( aSign == bSign ) { 2779129203Scognet return addFloat64Sigs( a, b, aSign ); 2780129203Scognet } 2781129203Scognet else { 2782129203Scognet return subFloat64Sigs( a, b, aSign ); 2783129203Scognet } 2784129203Scognet 2785129203Scognet} 2786129203Scognet 2787129203Scognet/* 2788129203Scognet------------------------------------------------------------------------------- 2789129203ScognetReturns the result of subtracting the double-precision floating-point values 2790129203Scognet`a' and `b'. The operation is performed according to the IEC/IEEE Standard 2791129203Scognetfor Binary Floating-Point Arithmetic. 2792129203Scognet------------------------------------------------------------------------------- 2793129203Scognet*/ 2794129203Scognetfloat64 float64_sub( float64 a, float64 b ) 2795129203Scognet{ 2796129203Scognet flag aSign, bSign; 2797129203Scognet 2798129203Scognet aSign = extractFloat64Sign( a ); 2799129203Scognet bSign = extractFloat64Sign( b ); 2800129203Scognet if ( aSign == bSign ) { 2801129203Scognet return subFloat64Sigs( a, b, aSign ); 2802129203Scognet } 2803129203Scognet else { 2804129203Scognet return addFloat64Sigs( a, b, aSign ); 2805129203Scognet } 2806129203Scognet 2807129203Scognet} 2808129203Scognet 2809129203Scognet/* 2810129203Scognet------------------------------------------------------------------------------- 2811129203ScognetReturns the result of multiplying the double-precision floating-point values 2812129203Scognet`a' and `b'. The operation is performed according to the IEC/IEEE Standard 2813129203Scognetfor Binary Floating-Point Arithmetic. 2814129203Scognet------------------------------------------------------------------------------- 2815129203Scognet*/ 2816129203Scognetfloat64 float64_mul( float64 a, float64 b ) 2817129203Scognet{ 2818129203Scognet flag aSign, bSign, zSign; 2819129203Scognet int16 aExp, bExp, zExp; 2820129203Scognet bits64 aSig, bSig, zSig0, zSig1; 2821129203Scognet 2822129203Scognet aSig = extractFloat64Frac( a ); 2823129203Scognet aExp = extractFloat64Exp( a ); 2824129203Scognet aSign = extractFloat64Sign( a ); 2825129203Scognet bSig = extractFloat64Frac( b ); 2826129203Scognet bExp = extractFloat64Exp( b ); 2827129203Scognet bSign = extractFloat64Sign( b ); 2828129203Scognet zSign = aSign ^ bSign; 2829129203Scognet if ( aExp == 0x7FF ) { 2830129203Scognet if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { 2831129203Scognet return propagateFloat64NaN( a, b ); 2832129203Scognet } 2833129203Scognet if ( ( bExp | bSig ) == 0 ) { 2834129203Scognet float_raise( float_flag_invalid ); 2835129203Scognet return float64_default_nan; 2836129203Scognet } 2837129203Scognet return packFloat64( zSign, 0x7FF, 0 ); 2838129203Scognet } 2839129203Scognet if ( bExp == 0x7FF ) { 2840129203Scognet if ( bSig ) return propagateFloat64NaN( a, b ); 2841129203Scognet if ( ( aExp | aSig ) == 0 ) { 2842129203Scognet float_raise( float_flag_invalid ); 2843129203Scognet return float64_default_nan; 2844129203Scognet } 2845129203Scognet return packFloat64( zSign, 0x7FF, 0 ); 2846129203Scognet } 2847129203Scognet if ( aExp == 0 ) { 2848129203Scognet if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); 2849129203Scognet normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2850129203Scognet } 2851129203Scognet if ( bExp == 0 ) { 2852129203Scognet if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); 2853129203Scognet normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 2854129203Scognet } 2855129203Scognet zExp = aExp + bExp - 0x3FF; 2856129203Scognet aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; 2857129203Scognet bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 2858129203Scognet mul64To128( aSig, bSig, &zSig0, &zSig1 ); 2859129203Scognet zSig0 |= ( zSig1 != 0 ); 2860129203Scognet if ( 0 <= (sbits64) ( zSig0<<1 ) ) { 2861129203Scognet zSig0 <<= 1; 2862129203Scognet --zExp; 2863129203Scognet } 2864129203Scognet return roundAndPackFloat64( zSign, zExp, zSig0 ); 2865129203Scognet 2866129203Scognet} 2867129203Scognet 2868129203Scognet/* 2869129203Scognet------------------------------------------------------------------------------- 2870129203ScognetReturns the result of dividing the double-precision floating-point value `a' 2871129203Scognetby the corresponding value `b'. The operation is performed according to 2872129203Scognetthe IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2873129203Scognet------------------------------------------------------------------------------- 2874129203Scognet*/ 2875129203Scognetfloat64 float64_div( float64 a, float64 b ) 2876129203Scognet{ 2877129203Scognet flag aSign, bSign, zSign; 2878129203Scognet int16 aExp, bExp, zExp; 2879129203Scognet bits64 aSig, bSig, zSig; 2880129203Scognet bits64 rem0, rem1; 2881129203Scognet bits64 term0, term1; 2882129203Scognet 2883129203Scognet aSig = extractFloat64Frac( a ); 2884129203Scognet aExp = extractFloat64Exp( a ); 2885129203Scognet aSign = extractFloat64Sign( a ); 2886129203Scognet bSig = extractFloat64Frac( b ); 2887129203Scognet bExp = extractFloat64Exp( b ); 2888129203Scognet bSign = extractFloat64Sign( b ); 2889129203Scognet zSign = aSign ^ bSign; 2890129203Scognet if ( aExp == 0x7FF ) { 2891129203Scognet if ( aSig ) return propagateFloat64NaN( a, b ); 2892129203Scognet if ( bExp == 0x7FF ) { 2893129203Scognet if ( bSig ) return propagateFloat64NaN( a, b ); 2894129203Scognet float_raise( float_flag_invalid ); 2895129203Scognet return float64_default_nan; 2896129203Scognet } 2897129203Scognet return packFloat64( zSign, 0x7FF, 0 ); 2898129203Scognet } 2899129203Scognet if ( bExp == 0x7FF ) { 2900129203Scognet if ( bSig ) return propagateFloat64NaN( a, b ); 2901129203Scognet return packFloat64( zSign, 0, 0 ); 2902129203Scognet } 2903129203Scognet if ( bExp == 0 ) { 2904129203Scognet if ( bSig == 0 ) { 2905129203Scognet if ( ( aExp | aSig ) == 0 ) { 2906129203Scognet float_raise( float_flag_invalid ); 2907129203Scognet return float64_default_nan; 2908129203Scognet } 2909129203Scognet float_raise( float_flag_divbyzero ); 2910129203Scognet return packFloat64( zSign, 0x7FF, 0 ); 2911129203Scognet } 2912129203Scognet normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 2913129203Scognet } 2914129203Scognet if ( aExp == 0 ) { 2915129203Scognet if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); 2916129203Scognet normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2917129203Scognet } 2918129203Scognet zExp = aExp - bExp + 0x3FD; 2919129203Scognet aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; 2920129203Scognet bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 2921129203Scognet if ( bSig <= ( aSig + aSig ) ) { 2922129203Scognet aSig >>= 1; 2923129203Scognet ++zExp; 2924129203Scognet } 2925129203Scognet zSig = estimateDiv128To64( aSig, 0, bSig ); 2926129203Scognet if ( ( zSig & 0x1FF ) <= 2 ) { 2927129203Scognet mul64To128( bSig, zSig, &term0, &term1 ); 2928129203Scognet sub128( aSig, 0, term0, term1, &rem0, &rem1 ); 2929129203Scognet while ( (sbits64) rem0 < 0 ) { 2930129203Scognet --zSig; 2931129203Scognet add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 2932129203Scognet } 2933129203Scognet zSig |= ( rem1 != 0 ); 2934129203Scognet } 2935129203Scognet return roundAndPackFloat64( zSign, zExp, zSig ); 2936129203Scognet 2937129203Scognet} 2938129203Scognet 2939129203Scognet#ifndef SOFTFLOAT_FOR_GCC 2940129203Scognet/* 2941129203Scognet------------------------------------------------------------------------------- 2942129203ScognetReturns the remainder of the double-precision floating-point value `a' 2943129203Scognetwith respect to the corresponding value `b'. The operation is performed 2944129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2945129203Scognet------------------------------------------------------------------------------- 2946129203Scognet*/ 2947129203Scognetfloat64 float64_rem( float64 a, float64 b ) 2948129203Scognet{ 2949129203Scognet flag aSign, bSign, zSign; 2950129203Scognet int16 aExp, bExp, expDiff; 2951129203Scognet bits64 aSig, bSig; 2952129203Scognet bits64 q, alternateASig; 2953129203Scognet sbits64 sigMean; 2954129203Scognet 2955129203Scognet aSig = extractFloat64Frac( a ); 2956129203Scognet aExp = extractFloat64Exp( a ); 2957129203Scognet aSign = extractFloat64Sign( a ); 2958129203Scognet bSig = extractFloat64Frac( b ); 2959129203Scognet bExp = extractFloat64Exp( b ); 2960129203Scognet bSign = extractFloat64Sign( b ); 2961129203Scognet if ( aExp == 0x7FF ) { 2962129203Scognet if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { 2963129203Scognet return propagateFloat64NaN( a, b ); 2964129203Scognet } 2965129203Scognet float_raise( float_flag_invalid ); 2966129203Scognet return float64_default_nan; 2967129203Scognet } 2968129203Scognet if ( bExp == 0x7FF ) { 2969129203Scognet if ( bSig ) return propagateFloat64NaN( a, b ); 2970129203Scognet return a; 2971129203Scognet } 2972129203Scognet if ( bExp == 0 ) { 2973129203Scognet if ( bSig == 0 ) { 2974129203Scognet float_raise( float_flag_invalid ); 2975129203Scognet return float64_default_nan; 2976129203Scognet } 2977129203Scognet normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 2978129203Scognet } 2979129203Scognet if ( aExp == 0 ) { 2980129203Scognet if ( aSig == 0 ) return a; 2981129203Scognet normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2982129203Scognet } 2983129203Scognet expDiff = aExp - bExp; 2984129203Scognet aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; 2985129203Scognet bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 2986129203Scognet if ( expDiff < 0 ) { 2987129203Scognet if ( expDiff < -1 ) return a; 2988129203Scognet aSig >>= 1; 2989129203Scognet } 2990129203Scognet q = ( bSig <= aSig ); 2991129203Scognet if ( q ) aSig -= bSig; 2992129203Scognet expDiff -= 64; 2993129203Scognet while ( 0 < expDiff ) { 2994129203Scognet q = estimateDiv128To64( aSig, 0, bSig ); 2995129203Scognet q = ( 2 < q ) ? q - 2 : 0; 2996129203Scognet aSig = - ( ( bSig>>2 ) * q ); 2997129203Scognet expDiff -= 62; 2998129203Scognet } 2999129203Scognet expDiff += 64; 3000129203Scognet if ( 0 < expDiff ) { 3001129203Scognet q = estimateDiv128To64( aSig, 0, bSig ); 3002129203Scognet q = ( 2 < q ) ? q - 2 : 0; 3003129203Scognet q >>= 64 - expDiff; 3004129203Scognet bSig >>= 2; 3005129203Scognet aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 3006129203Scognet } 3007129203Scognet else { 3008129203Scognet aSig >>= 2; 3009129203Scognet bSig >>= 2; 3010129203Scognet } 3011129203Scognet do { 3012129203Scognet alternateASig = aSig; 3013129203Scognet ++q; 3014129203Scognet aSig -= bSig; 3015129203Scognet } while ( 0 <= (sbits64) aSig ); 3016129203Scognet sigMean = aSig + alternateASig; 3017129203Scognet if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 3018129203Scognet aSig = alternateASig; 3019129203Scognet } 3020129203Scognet zSign = ( (sbits64) aSig < 0 ); 3021129203Scognet if ( zSign ) aSig = - aSig; 3022129203Scognet return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); 3023129203Scognet 3024129203Scognet} 3025129203Scognet 3026129203Scognet/* 3027129203Scognet------------------------------------------------------------------------------- 3028129203ScognetReturns the square root of the double-precision floating-point value `a'. 3029129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary 3030129203ScognetFloating-Point Arithmetic. 3031129203Scognet------------------------------------------------------------------------------- 3032129203Scognet*/ 3033129203Scognetfloat64 float64_sqrt( float64 a ) 3034129203Scognet{ 3035129203Scognet flag aSign; 3036129203Scognet int16 aExp, zExp; 3037129203Scognet bits64 aSig, zSig, doubleZSig; 3038129203Scognet bits64 rem0, rem1, term0, term1; 3039129203Scognet 3040129203Scognet aSig = extractFloat64Frac( a ); 3041129203Scognet aExp = extractFloat64Exp( a ); 3042129203Scognet aSign = extractFloat64Sign( a ); 3043129203Scognet if ( aExp == 0x7FF ) { 3044129203Scognet if ( aSig ) return propagateFloat64NaN( a, a ); 3045129203Scognet if ( ! aSign ) return a; 3046129203Scognet float_raise( float_flag_invalid ); 3047129203Scognet return float64_default_nan; 3048129203Scognet } 3049129203Scognet if ( aSign ) { 3050129203Scognet if ( ( aExp | aSig ) == 0 ) return a; 3051129203Scognet float_raise( float_flag_invalid ); 3052129203Scognet return float64_default_nan; 3053129203Scognet } 3054129203Scognet if ( aExp == 0 ) { 3055129203Scognet if ( aSig == 0 ) return 0; 3056129203Scognet normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 3057129203Scognet } 3058129203Scognet zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; 3059129203Scognet aSig |= LIT64( 0x0010000000000000 ); 3060129203Scognet zSig = estimateSqrt32( aExp, aSig>>21 ); 3061129203Scognet aSig <<= 9 - ( aExp & 1 ); 3062129203Scognet zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 ); 3063129203Scognet if ( ( zSig & 0x1FF ) <= 5 ) { 3064129203Scognet doubleZSig = zSig<<1; 3065129203Scognet mul64To128( zSig, zSig, &term0, &term1 ); 3066129203Scognet sub128( aSig, 0, term0, term1, &rem0, &rem1 ); 3067129203Scognet while ( (sbits64) rem0 < 0 ) { 3068129203Scognet --zSig; 3069129203Scognet doubleZSig -= 2; 3070129203Scognet add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 ); 3071129203Scognet } 3072129203Scognet zSig |= ( ( rem0 | rem1 ) != 0 ); 3073129203Scognet } 3074129203Scognet return roundAndPackFloat64( 0, zExp, zSig ); 3075129203Scognet 3076129203Scognet} 3077129203Scognet#endif 3078129203Scognet 3079129203Scognet/* 3080129203Scognet------------------------------------------------------------------------------- 3081129203ScognetReturns 1 if the double-precision floating-point value `a' is equal to the 3082129203Scognetcorresponding value `b', and 0 otherwise. The comparison is performed 3083129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3084129203Scognet------------------------------------------------------------------------------- 3085129203Scognet*/ 3086129203Scognetflag float64_eq( float64 a, float64 b ) 3087129203Scognet{ 3088129203Scognet 3089129203Scognet if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3090129203Scognet || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3091129203Scognet ) { 3092129203Scognet if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 3093129203Scognet float_raise( float_flag_invalid ); 3094129203Scognet } 3095129203Scognet return 0; 3096129203Scognet } 3097129203Scognet return ( a == b ) || 3098129203Scognet ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); 3099129203Scognet 3100129203Scognet} 3101129203Scognet 3102129203Scognet/* 3103129203Scognet------------------------------------------------------------------------------- 3104129203ScognetReturns 1 if the double-precision floating-point value `a' is less than or 3105129203Scognetequal to the corresponding value `b', and 0 otherwise. The comparison is 3106129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 3107129203ScognetArithmetic. 3108129203Scognet------------------------------------------------------------------------------- 3109129203Scognet*/ 3110129203Scognetflag float64_le( float64 a, float64 b ) 3111129203Scognet{ 3112129203Scognet flag aSign, bSign; 3113129203Scognet 3114129203Scognet if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3115129203Scognet || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3116129203Scognet ) { 3117129203Scognet float_raise( float_flag_invalid ); 3118129203Scognet return 0; 3119129203Scognet } 3120129203Scognet aSign = extractFloat64Sign( a ); 3121129203Scognet bSign = extractFloat64Sign( b ); 3122129203Scognet if ( aSign != bSign ) 3123129203Scognet return aSign || 3124129203Scognet ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 3125129203Scognet 0 ); 3126129203Scognet return ( a == b ) || 3127129203Scognet ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 3128129203Scognet 3129129203Scognet} 3130129203Scognet 3131129203Scognet/* 3132129203Scognet------------------------------------------------------------------------------- 3133129203ScognetReturns 1 if the double-precision floating-point value `a' is less than 3134129203Scognetthe corresponding value `b', and 0 otherwise. The comparison is performed 3135129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3136129203Scognet------------------------------------------------------------------------------- 3137129203Scognet*/ 3138129203Scognetflag float64_lt( float64 a, float64 b ) 3139129203Scognet{ 3140129203Scognet flag aSign, bSign; 3141129203Scognet 3142129203Scognet if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3143129203Scognet || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3144129203Scognet ) { 3145129203Scognet float_raise( float_flag_invalid ); 3146129203Scognet return 0; 3147129203Scognet } 3148129203Scognet aSign = extractFloat64Sign( a ); 3149129203Scognet bSign = extractFloat64Sign( b ); 3150129203Scognet if ( aSign != bSign ) 3151129203Scognet return aSign && 3152129203Scognet ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != 3153129203Scognet 0 ); 3154129203Scognet return ( a != b ) && 3155129203Scognet ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 3156129203Scognet 3157129203Scognet} 3158129203Scognet 3159129203Scognet#ifndef SOFTFLOAT_FOR_GCC 3160129203Scognet/* 3161129203Scognet------------------------------------------------------------------------------- 3162129203ScognetReturns 1 if the double-precision floating-point value `a' is equal to the 3163129203Scognetcorresponding value `b', and 0 otherwise. The invalid exception is raised 3164129203Scognetif either operand is a NaN. Otherwise, the comparison is performed 3165129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3166129203Scognet------------------------------------------------------------------------------- 3167129203Scognet*/ 3168129203Scognetflag float64_eq_signaling( float64 a, float64 b ) 3169129203Scognet{ 3170129203Scognet 3171129203Scognet if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3172129203Scognet || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3173129203Scognet ) { 3174129203Scognet float_raise( float_flag_invalid ); 3175129203Scognet return 0; 3176129203Scognet } 3177129203Scognet return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); 3178129203Scognet 3179129203Scognet} 3180129203Scognet 3181129203Scognet/* 3182129203Scognet------------------------------------------------------------------------------- 3183129203ScognetReturns 1 if the double-precision floating-point value `a' is less than or 3184129203Scognetequal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 3185129203Scognetcause an exception. Otherwise, the comparison is performed according to the 3186129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 3187129203Scognet------------------------------------------------------------------------------- 3188129203Scognet*/ 3189129203Scognetflag float64_le_quiet( float64 a, float64 b ) 3190129203Scognet{ 3191129203Scognet flag aSign, bSign; 3192129203Scognet 3193129203Scognet if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3194129203Scognet || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3195129203Scognet ) { 3196129203Scognet if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 3197129203Scognet float_raise( float_flag_invalid ); 3198129203Scognet } 3199129203Scognet return 0; 3200129203Scognet } 3201129203Scognet aSign = extractFloat64Sign( a ); 3202129203Scognet bSign = extractFloat64Sign( b ); 3203129203Scognet if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); 3204129203Scognet return ( a == b ) || ( aSign ^ ( a < b ) ); 3205129203Scognet 3206129203Scognet} 3207129203Scognet 3208129203Scognet/* 3209129203Scognet------------------------------------------------------------------------------- 3210129203ScognetReturns 1 if the double-precision floating-point value `a' is less than 3211129203Scognetthe corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 3212129203Scognetexception. Otherwise, the comparison is performed according to the IEC/IEEE 3213129203ScognetStandard for Binary Floating-Point Arithmetic. 3214129203Scognet------------------------------------------------------------------------------- 3215129203Scognet*/ 3216129203Scognetflag float64_lt_quiet( float64 a, float64 b ) 3217129203Scognet{ 3218129203Scognet flag aSign, bSign; 3219129203Scognet 3220129203Scognet if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3221129203Scognet || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3222129203Scognet ) { 3223129203Scognet if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 3224129203Scognet float_raise( float_flag_invalid ); 3225129203Scognet } 3226129203Scognet return 0; 3227129203Scognet } 3228129203Scognet aSign = extractFloat64Sign( a ); 3229129203Scognet bSign = extractFloat64Sign( b ); 3230129203Scognet if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); 3231129203Scognet return ( a != b ) && ( aSign ^ ( a < b ) ); 3232129203Scognet 3233129203Scognet} 3234129203Scognet#endif 3235129203Scognet 3236129203Scognet#ifdef FLOATX80 3237129203Scognet 3238129203Scognet/* 3239129203Scognet------------------------------------------------------------------------------- 3240129203ScognetReturns the result of converting the extended double-precision floating- 3241129203Scognetpoint value `a' to the 32-bit two's complement integer format. The 3242129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3243129203ScognetFloating-Point Arithmetic---which means in particular that the conversion 3244129203Scognetis rounded according to the current rounding mode. If `a' is a NaN, the 3245129203Scognetlargest positive integer is returned. Otherwise, if the conversion 3246129203Scognetoverflows, the largest integer with the same sign as `a' is returned. 3247129203Scognet------------------------------------------------------------------------------- 3248129203Scognet*/ 3249129203Scognetint32 floatx80_to_int32( floatx80 a ) 3250129203Scognet{ 3251129203Scognet flag aSign; 3252129203Scognet int32 aExp, shiftCount; 3253129203Scognet bits64 aSig; 3254129203Scognet 3255129203Scognet aSig = extractFloatx80Frac( a ); 3256129203Scognet aExp = extractFloatx80Exp( a ); 3257129203Scognet aSign = extractFloatx80Sign( a ); 3258129203Scognet if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; 3259129203Scognet shiftCount = 0x4037 - aExp; 3260129203Scognet if ( shiftCount <= 0 ) shiftCount = 1; 3261129203Scognet shift64RightJamming( aSig, shiftCount, &aSig ); 3262129203Scognet return roundAndPackInt32( aSign, aSig ); 3263129203Scognet 3264129203Scognet} 3265129203Scognet 3266129203Scognet/* 3267129203Scognet------------------------------------------------------------------------------- 3268129203ScognetReturns the result of converting the extended double-precision floating- 3269129203Scognetpoint value `a' to the 32-bit two's complement integer format. The 3270129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3271129203ScognetFloating-Point Arithmetic, except that the conversion is always rounded 3272129203Scognettoward zero. If `a' is a NaN, the largest positive integer is returned. 3273129203ScognetOtherwise, if the conversion overflows, the largest integer with the same 3274129203Scognetsign as `a' is returned. 3275129203Scognet------------------------------------------------------------------------------- 3276129203Scognet*/ 3277129203Scognetint32 floatx80_to_int32_round_to_zero( floatx80 a ) 3278129203Scognet{ 3279129203Scognet flag aSign; 3280129203Scognet int32 aExp, shiftCount; 3281129203Scognet bits64 aSig, savedASig; 3282129203Scognet int32 z; 3283129203Scognet 3284129203Scognet aSig = extractFloatx80Frac( a ); 3285129203Scognet aExp = extractFloatx80Exp( a ); 3286129203Scognet aSign = extractFloatx80Sign( a ); 3287129203Scognet if ( 0x401E < aExp ) { 3288129203Scognet if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; 3289129203Scognet goto invalid; 3290129203Scognet } 3291129203Scognet else if ( aExp < 0x3FFF ) { 3292129203Scognet if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; 3293129203Scognet return 0; 3294129203Scognet } 3295129203Scognet shiftCount = 0x403E - aExp; 3296129203Scognet savedASig = aSig; 3297129203Scognet aSig >>= shiftCount; 3298129203Scognet z = aSig; 3299129203Scognet if ( aSign ) z = - z; 3300129203Scognet if ( ( z < 0 ) ^ aSign ) { 3301129203Scognet invalid: 3302129203Scognet float_raise( float_flag_invalid ); 3303129203Scognet return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 3304129203Scognet } 3305129203Scognet if ( ( aSig<<shiftCount ) != savedASig ) { 3306129203Scognet float_exception_flags |= float_flag_inexact; 3307129203Scognet } 3308129203Scognet return z; 3309129203Scognet 3310129203Scognet} 3311129203Scognet 3312129203Scognet/* 3313129203Scognet------------------------------------------------------------------------------- 3314129203ScognetReturns the result of converting the extended double-precision floating- 3315129203Scognetpoint value `a' to the 64-bit two's complement integer format. The 3316129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3317129203ScognetFloating-Point Arithmetic---which means in particular that the conversion 3318129203Scognetis rounded according to the current rounding mode. If `a' is a NaN, 3319129203Scognetthe largest positive integer is returned. Otherwise, if the conversion 3320129203Scognetoverflows, the largest integer with the same sign as `a' is returned. 3321129203Scognet------------------------------------------------------------------------------- 3322129203Scognet*/ 3323129203Scognetint64 floatx80_to_int64( floatx80 a ) 3324129203Scognet{ 3325129203Scognet flag aSign; 3326129203Scognet int32 aExp, shiftCount; 3327129203Scognet bits64 aSig, aSigExtra; 3328129203Scognet 3329129203Scognet aSig = extractFloatx80Frac( a ); 3330129203Scognet aExp = extractFloatx80Exp( a ); 3331129203Scognet aSign = extractFloatx80Sign( a ); 3332129203Scognet shiftCount = 0x403E - aExp; 3333129203Scognet if ( shiftCount <= 0 ) { 3334129203Scognet if ( shiftCount ) { 3335129203Scognet float_raise( float_flag_invalid ); 3336129203Scognet if ( ! aSign 3337129203Scognet || ( ( aExp == 0x7FFF ) 3338129203Scognet && ( aSig != LIT64( 0x8000000000000000 ) ) ) 3339129203Scognet ) { 3340129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 3341129203Scognet } 3342129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 3343129203Scognet } 3344129203Scognet aSigExtra = 0; 3345129203Scognet } 3346129203Scognet else { 3347129203Scognet shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); 3348129203Scognet } 3349129203Scognet return roundAndPackInt64( aSign, aSig, aSigExtra ); 3350129203Scognet 3351129203Scognet} 3352129203Scognet 3353129203Scognet/* 3354129203Scognet------------------------------------------------------------------------------- 3355129203ScognetReturns the result of converting the extended double-precision floating- 3356129203Scognetpoint value `a' to the 64-bit two's complement integer format. The 3357129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3358129203ScognetFloating-Point Arithmetic, except that the conversion is always rounded 3359129203Scognettoward zero. If `a' is a NaN, the largest positive integer is returned. 3360129203ScognetOtherwise, if the conversion overflows, the largest integer with the same 3361129203Scognetsign as `a' is returned. 3362129203Scognet------------------------------------------------------------------------------- 3363129203Scognet*/ 3364129203Scognetint64 floatx80_to_int64_round_to_zero( floatx80 a ) 3365129203Scognet{ 3366129203Scognet flag aSign; 3367129203Scognet int32 aExp, shiftCount; 3368129203Scognet bits64 aSig; 3369129203Scognet int64 z; 3370129203Scognet 3371129203Scognet aSig = extractFloatx80Frac( a ); 3372129203Scognet aExp = extractFloatx80Exp( a ); 3373129203Scognet aSign = extractFloatx80Sign( a ); 3374129203Scognet shiftCount = aExp - 0x403E; 3375129203Scognet if ( 0 <= shiftCount ) { 3376129203Scognet aSig &= LIT64( 0x7FFFFFFFFFFFFFFF ); 3377129203Scognet if ( ( a.high != 0xC03E ) || aSig ) { 3378129203Scognet float_raise( float_flag_invalid ); 3379129203Scognet if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) { 3380129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 3381129203Scognet } 3382129203Scognet } 3383129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 3384129203Scognet } 3385129203Scognet else if ( aExp < 0x3FFF ) { 3386129203Scognet if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 3387129203Scognet return 0; 3388129203Scognet } 3389129203Scognet z = aSig>>( - shiftCount ); 3390129203Scognet if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { 3391129203Scognet float_exception_flags |= float_flag_inexact; 3392129203Scognet } 3393129203Scognet if ( aSign ) z = - z; 3394129203Scognet return z; 3395129203Scognet 3396129203Scognet} 3397129203Scognet 3398129203Scognet/* 3399129203Scognet------------------------------------------------------------------------------- 3400129203ScognetReturns the result of converting the extended double-precision floating- 3401129203Scognetpoint value `a' to the single-precision floating-point format. The 3402129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3403129203ScognetFloating-Point Arithmetic. 3404129203Scognet------------------------------------------------------------------------------- 3405129203Scognet*/ 3406129203Scognetfloat32 floatx80_to_float32( floatx80 a ) 3407129203Scognet{ 3408129203Scognet flag aSign; 3409129203Scognet int32 aExp; 3410129203Scognet bits64 aSig; 3411129203Scognet 3412129203Scognet aSig = extractFloatx80Frac( a ); 3413129203Scognet aExp = extractFloatx80Exp( a ); 3414129203Scognet aSign = extractFloatx80Sign( a ); 3415129203Scognet if ( aExp == 0x7FFF ) { 3416129203Scognet if ( (bits64) ( aSig<<1 ) ) { 3417129203Scognet return commonNaNToFloat32( floatx80ToCommonNaN( a ) ); 3418129203Scognet } 3419129203Scognet return packFloat32( aSign, 0xFF, 0 ); 3420129203Scognet } 3421129203Scognet shift64RightJamming( aSig, 33, &aSig ); 3422129203Scognet if ( aExp || aSig ) aExp -= 0x3F81; 3423129203Scognet return roundAndPackFloat32( aSign, aExp, aSig ); 3424129203Scognet 3425129203Scognet} 3426129203Scognet 3427129203Scognet/* 3428129203Scognet------------------------------------------------------------------------------- 3429129203ScognetReturns the result of converting the extended double-precision floating- 3430129203Scognetpoint value `a' to the double-precision floating-point format. The 3431129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3432129203ScognetFloating-Point Arithmetic. 3433129203Scognet------------------------------------------------------------------------------- 3434129203Scognet*/ 3435129203Scognetfloat64 floatx80_to_float64( floatx80 a ) 3436129203Scognet{ 3437129203Scognet flag aSign; 3438129203Scognet int32 aExp; 3439129203Scognet bits64 aSig, zSig; 3440129203Scognet 3441129203Scognet aSig = extractFloatx80Frac( a ); 3442129203Scognet aExp = extractFloatx80Exp( a ); 3443129203Scognet aSign = extractFloatx80Sign( a ); 3444129203Scognet if ( aExp == 0x7FFF ) { 3445129203Scognet if ( (bits64) ( aSig<<1 ) ) { 3446129203Scognet return commonNaNToFloat64( floatx80ToCommonNaN( a ) ); 3447129203Scognet } 3448129203Scognet return packFloat64( aSign, 0x7FF, 0 ); 3449129203Scognet } 3450129203Scognet shift64RightJamming( aSig, 1, &zSig ); 3451129203Scognet if ( aExp || aSig ) aExp -= 0x3C01; 3452129203Scognet return roundAndPackFloat64( aSign, aExp, zSig ); 3453129203Scognet 3454129203Scognet} 3455129203Scognet 3456129203Scognet#ifdef FLOAT128 3457129203Scognet 3458129203Scognet/* 3459129203Scognet------------------------------------------------------------------------------- 3460129203ScognetReturns the result of converting the extended double-precision floating- 3461129203Scognetpoint value `a' to the quadruple-precision floating-point format. The 3462129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 3463129203ScognetFloating-Point Arithmetic. 3464129203Scognet------------------------------------------------------------------------------- 3465129203Scognet*/ 3466129203Scognetfloat128 floatx80_to_float128( floatx80 a ) 3467129203Scognet{ 3468129203Scognet flag aSign; 3469129203Scognet int16 aExp; 3470129203Scognet bits64 aSig, zSig0, zSig1; 3471129203Scognet 3472129203Scognet aSig = extractFloatx80Frac( a ); 3473129203Scognet aExp = extractFloatx80Exp( a ); 3474129203Scognet aSign = extractFloatx80Sign( a ); 3475129203Scognet if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) { 3476129203Scognet return commonNaNToFloat128( floatx80ToCommonNaN( a ) ); 3477129203Scognet } 3478129203Scognet shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); 3479129203Scognet return packFloat128( aSign, aExp, zSig0, zSig1 ); 3480129203Scognet 3481129203Scognet} 3482129203Scognet 3483129203Scognet#endif 3484129203Scognet 3485129203Scognet/* 3486129203Scognet------------------------------------------------------------------------------- 3487129203ScognetRounds the extended double-precision floating-point value `a' to an integer, 3488129203Scognetand returns the result as an extended quadruple-precision floating-point 3489129203Scognetvalue. The operation is performed according to the IEC/IEEE Standard for 3490129203ScognetBinary Floating-Point Arithmetic. 3491129203Scognet------------------------------------------------------------------------------- 3492129203Scognet*/ 3493129203Scognetfloatx80 floatx80_round_to_int( floatx80 a ) 3494129203Scognet{ 3495129203Scognet flag aSign; 3496129203Scognet int32 aExp; 3497129203Scognet bits64 lastBitMask, roundBitsMask; 3498129203Scognet int8 roundingMode; 3499129203Scognet floatx80 z; 3500129203Scognet 3501129203Scognet aExp = extractFloatx80Exp( a ); 3502129203Scognet if ( 0x403E <= aExp ) { 3503129203Scognet if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { 3504129203Scognet return propagateFloatx80NaN( a, a ); 3505129203Scognet } 3506129203Scognet return a; 3507129203Scognet } 3508129203Scognet if ( aExp < 0x3FFF ) { 3509129203Scognet if ( ( aExp == 0 ) 3510129203Scognet && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { 3511129203Scognet return a; 3512129203Scognet } 3513129203Scognet float_exception_flags |= float_flag_inexact; 3514129203Scognet aSign = extractFloatx80Sign( a ); 3515129203Scognet switch ( float_rounding_mode ) { 3516129203Scognet case float_round_nearest_even: 3517129203Scognet if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) 3518129203Scognet ) { 3519129203Scognet return 3520129203Scognet packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); 3521129203Scognet } 3522129203Scognet break; 3523129203Scognet case float_round_to_zero: 3524129203Scognet break; 3525129203Scognet case float_round_down: 3526129203Scognet return 3527129203Scognet aSign ? 3528129203Scognet packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) 3529129203Scognet : packFloatx80( 0, 0, 0 ); 3530129203Scognet case float_round_up: 3531129203Scognet return 3532129203Scognet aSign ? packFloatx80( 1, 0, 0 ) 3533129203Scognet : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); 3534129203Scognet } 3535129203Scognet return packFloatx80( aSign, 0, 0 ); 3536129203Scognet } 3537129203Scognet lastBitMask = 1; 3538129203Scognet lastBitMask <<= 0x403E - aExp; 3539129203Scognet roundBitsMask = lastBitMask - 1; 3540129203Scognet z = a; 3541129203Scognet roundingMode = float_rounding_mode; 3542129203Scognet if ( roundingMode == float_round_nearest_even ) { 3543129203Scognet z.low += lastBitMask>>1; 3544129203Scognet if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 3545129203Scognet } 3546129203Scognet else if ( roundingMode != float_round_to_zero ) { 3547129203Scognet if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { 3548129203Scognet z.low += roundBitsMask; 3549129203Scognet } 3550129203Scognet } 3551129203Scognet z.low &= ~ roundBitsMask; 3552129203Scognet if ( z.low == 0 ) { 3553129203Scognet ++z.high; 3554129203Scognet z.low = LIT64( 0x8000000000000000 ); 3555129203Scognet } 3556129203Scognet if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; 3557129203Scognet return z; 3558129203Scognet 3559129203Scognet} 3560129203Scognet 3561129203Scognet/* 3562129203Scognet------------------------------------------------------------------------------- 3563129203ScognetReturns the result of adding the absolute values of the extended double- 3564129203Scognetprecision floating-point values `a' and `b'. If `zSign' is 1, the sum is 3565129203Scognetnegated before being returned. `zSign' is ignored if the result is a NaN. 3566129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary 3567129203ScognetFloating-Point Arithmetic. 3568129203Scognet------------------------------------------------------------------------------- 3569129203Scognet*/ 3570129203Scognetstatic floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) 3571129203Scognet{ 3572129203Scognet int32 aExp, bExp, zExp; 3573129203Scognet bits64 aSig, bSig, zSig0, zSig1; 3574129203Scognet int32 expDiff; 3575129203Scognet 3576129203Scognet aSig = extractFloatx80Frac( a ); 3577129203Scognet aExp = extractFloatx80Exp( a ); 3578129203Scognet bSig = extractFloatx80Frac( b ); 3579129203Scognet bExp = extractFloatx80Exp( b ); 3580129203Scognet expDiff = aExp - bExp; 3581129203Scognet if ( 0 < expDiff ) { 3582129203Scognet if ( aExp == 0x7FFF ) { 3583129203Scognet if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3584129203Scognet return a; 3585129203Scognet } 3586129203Scognet if ( bExp == 0 ) --expDiff; 3587129203Scognet shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 3588129203Scognet zExp = aExp; 3589129203Scognet } 3590129203Scognet else if ( expDiff < 0 ) { 3591129203Scognet if ( bExp == 0x7FFF ) { 3592129203Scognet if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3593129203Scognet return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3594129203Scognet } 3595129203Scognet if ( aExp == 0 ) ++expDiff; 3596129203Scognet shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 3597129203Scognet zExp = bExp; 3598129203Scognet } 3599129203Scognet else { 3600129203Scognet if ( aExp == 0x7FFF ) { 3601129203Scognet if ( (bits64) ( ( aSig | bSig )<<1 ) ) { 3602129203Scognet return propagateFloatx80NaN( a, b ); 3603129203Scognet } 3604129203Scognet return a; 3605129203Scognet } 3606129203Scognet zSig1 = 0; 3607129203Scognet zSig0 = aSig + bSig; 3608129203Scognet if ( aExp == 0 ) { 3609129203Scognet normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); 3610129203Scognet goto roundAndPack; 3611129203Scognet } 3612129203Scognet zExp = aExp; 3613129203Scognet goto shiftRight1; 3614129203Scognet } 3615129203Scognet zSig0 = aSig + bSig; 3616129203Scognet if ( (sbits64) zSig0 < 0 ) goto roundAndPack; 3617129203Scognet shiftRight1: 3618129203Scognet shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); 3619129203Scognet zSig0 |= LIT64( 0x8000000000000000 ); 3620129203Scognet ++zExp; 3621129203Scognet roundAndPack: 3622129203Scognet return 3623129203Scognet roundAndPackFloatx80( 3624129203Scognet floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3625129203Scognet 3626129203Scognet} 3627129203Scognet 3628129203Scognet/* 3629129203Scognet------------------------------------------------------------------------------- 3630129203ScognetReturns the result of subtracting the absolute values of the extended 3631129203Scognetdouble-precision floating-point values `a' and `b'. If `zSign' is 1, the 3632129203Scognetdifference is negated before being returned. `zSign' is ignored if the 3633129203Scognetresult is a NaN. The subtraction is performed according to the IEC/IEEE 3634129203ScognetStandard for Binary Floating-Point Arithmetic. 3635129203Scognet------------------------------------------------------------------------------- 3636129203Scognet*/ 3637129203Scognetstatic floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) 3638129203Scognet{ 3639129203Scognet int32 aExp, bExp, zExp; 3640129203Scognet bits64 aSig, bSig, zSig0, zSig1; 3641129203Scognet int32 expDiff; 3642129203Scognet floatx80 z; 3643129203Scognet 3644129203Scognet aSig = extractFloatx80Frac( a ); 3645129203Scognet aExp = extractFloatx80Exp( a ); 3646129203Scognet bSig = extractFloatx80Frac( b ); 3647129203Scognet bExp = extractFloatx80Exp( b ); 3648129203Scognet expDiff = aExp - bExp; 3649129203Scognet if ( 0 < expDiff ) goto aExpBigger; 3650129203Scognet if ( expDiff < 0 ) goto bExpBigger; 3651129203Scognet if ( aExp == 0x7FFF ) { 3652129203Scognet if ( (bits64) ( ( aSig | bSig )<<1 ) ) { 3653129203Scognet return propagateFloatx80NaN( a, b ); 3654129203Scognet } 3655129203Scognet float_raise( float_flag_invalid ); 3656129203Scognet z.low = floatx80_default_nan_low; 3657129203Scognet z.high = floatx80_default_nan_high; 3658129203Scognet return z; 3659129203Scognet } 3660129203Scognet if ( aExp == 0 ) { 3661129203Scognet aExp = 1; 3662129203Scognet bExp = 1; 3663129203Scognet } 3664129203Scognet zSig1 = 0; 3665129203Scognet if ( bSig < aSig ) goto aBigger; 3666129203Scognet if ( aSig < bSig ) goto bBigger; 3667129203Scognet return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); 3668129203Scognet bExpBigger: 3669129203Scognet if ( bExp == 0x7FFF ) { 3670129203Scognet if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3671129203Scognet return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3672129203Scognet } 3673129203Scognet if ( aExp == 0 ) ++expDiff; 3674129203Scognet shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 3675129203Scognet bBigger: 3676129203Scognet sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); 3677129203Scognet zExp = bExp; 3678129203Scognet zSign ^= 1; 3679129203Scognet goto normalizeRoundAndPack; 3680129203Scognet aExpBigger: 3681129203Scognet if ( aExp == 0x7FFF ) { 3682129203Scognet if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3683129203Scognet return a; 3684129203Scognet } 3685129203Scognet if ( bExp == 0 ) --expDiff; 3686129203Scognet shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 3687129203Scognet aBigger: 3688129203Scognet sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); 3689129203Scognet zExp = aExp; 3690129203Scognet normalizeRoundAndPack: 3691129203Scognet return 3692129203Scognet normalizeRoundAndPackFloatx80( 3693129203Scognet floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3694129203Scognet 3695129203Scognet} 3696129203Scognet 3697129203Scognet/* 3698129203Scognet------------------------------------------------------------------------------- 3699129203ScognetReturns the result of adding the extended double-precision floating-point 3700129203Scognetvalues `a' and `b'. The operation is performed according to the IEC/IEEE 3701129203ScognetStandard for Binary Floating-Point Arithmetic. 3702129203Scognet------------------------------------------------------------------------------- 3703129203Scognet*/ 3704129203Scognetfloatx80 floatx80_add( floatx80 a, floatx80 b ) 3705129203Scognet{ 3706129203Scognet flag aSign, bSign; 3707129203Scognet 3708129203Scognet aSign = extractFloatx80Sign( a ); 3709129203Scognet bSign = extractFloatx80Sign( b ); 3710129203Scognet if ( aSign == bSign ) { 3711129203Scognet return addFloatx80Sigs( a, b, aSign ); 3712129203Scognet } 3713129203Scognet else { 3714129203Scognet return subFloatx80Sigs( a, b, aSign ); 3715129203Scognet } 3716129203Scognet 3717129203Scognet} 3718129203Scognet 3719129203Scognet/* 3720129203Scognet------------------------------------------------------------------------------- 3721129203ScognetReturns the result of subtracting the extended double-precision floating- 3722129203Scognetpoint values `a' and `b'. The operation is performed according to the 3723129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 3724129203Scognet------------------------------------------------------------------------------- 3725129203Scognet*/ 3726129203Scognetfloatx80 floatx80_sub( floatx80 a, floatx80 b ) 3727129203Scognet{ 3728129203Scognet flag aSign, bSign; 3729129203Scognet 3730129203Scognet aSign = extractFloatx80Sign( a ); 3731129203Scognet bSign = extractFloatx80Sign( b ); 3732129203Scognet if ( aSign == bSign ) { 3733129203Scognet return subFloatx80Sigs( a, b, aSign ); 3734129203Scognet } 3735129203Scognet else { 3736129203Scognet return addFloatx80Sigs( a, b, aSign ); 3737129203Scognet } 3738129203Scognet 3739129203Scognet} 3740129203Scognet 3741129203Scognet/* 3742129203Scognet------------------------------------------------------------------------------- 3743129203ScognetReturns the result of multiplying the extended double-precision floating- 3744129203Scognetpoint values `a' and `b'. The operation is performed according to the 3745129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 3746129203Scognet------------------------------------------------------------------------------- 3747129203Scognet*/ 3748129203Scognetfloatx80 floatx80_mul( floatx80 a, floatx80 b ) 3749129203Scognet{ 3750129203Scognet flag aSign, bSign, zSign; 3751129203Scognet int32 aExp, bExp, zExp; 3752129203Scognet bits64 aSig, bSig, zSig0, zSig1; 3753129203Scognet floatx80 z; 3754129203Scognet 3755129203Scognet aSig = extractFloatx80Frac( a ); 3756129203Scognet aExp = extractFloatx80Exp( a ); 3757129203Scognet aSign = extractFloatx80Sign( a ); 3758129203Scognet bSig = extractFloatx80Frac( b ); 3759129203Scognet bExp = extractFloatx80Exp( b ); 3760129203Scognet bSign = extractFloatx80Sign( b ); 3761129203Scognet zSign = aSign ^ bSign; 3762129203Scognet if ( aExp == 0x7FFF ) { 3763129203Scognet if ( (bits64) ( aSig<<1 ) 3764129203Scognet || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { 3765129203Scognet return propagateFloatx80NaN( a, b ); 3766129203Scognet } 3767129203Scognet if ( ( bExp | bSig ) == 0 ) goto invalid; 3768129203Scognet return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3769129203Scognet } 3770129203Scognet if ( bExp == 0x7FFF ) { 3771129203Scognet if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3772129203Scognet if ( ( aExp | aSig ) == 0 ) { 3773129203Scognet invalid: 3774129203Scognet float_raise( float_flag_invalid ); 3775129203Scognet z.low = floatx80_default_nan_low; 3776129203Scognet z.high = floatx80_default_nan_high; 3777129203Scognet return z; 3778129203Scognet } 3779129203Scognet return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3780129203Scognet } 3781129203Scognet if ( aExp == 0 ) { 3782129203Scognet if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 3783129203Scognet normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 3784129203Scognet } 3785129203Scognet if ( bExp == 0 ) { 3786129203Scognet if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); 3787129203Scognet normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3788129203Scognet } 3789129203Scognet zExp = aExp + bExp - 0x3FFE; 3790129203Scognet mul64To128( aSig, bSig, &zSig0, &zSig1 ); 3791129203Scognet if ( 0 < (sbits64) zSig0 ) { 3792129203Scognet shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); 3793129203Scognet --zExp; 3794129203Scognet } 3795129203Scognet return 3796129203Scognet roundAndPackFloatx80( 3797129203Scognet floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3798129203Scognet 3799129203Scognet} 3800129203Scognet 3801129203Scognet/* 3802129203Scognet------------------------------------------------------------------------------- 3803129203ScognetReturns the result of dividing the extended double-precision floating-point 3804129203Scognetvalue `a' by the corresponding value `b'. The operation is performed 3805129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3806129203Scognet------------------------------------------------------------------------------- 3807129203Scognet*/ 3808129203Scognetfloatx80 floatx80_div( floatx80 a, floatx80 b ) 3809129203Scognet{ 3810129203Scognet flag aSign, bSign, zSign; 3811129203Scognet int32 aExp, bExp, zExp; 3812129203Scognet bits64 aSig, bSig, zSig0, zSig1; 3813129203Scognet bits64 rem0, rem1, rem2, term0, term1, term2; 3814129203Scognet floatx80 z; 3815129203Scognet 3816129203Scognet aSig = extractFloatx80Frac( a ); 3817129203Scognet aExp = extractFloatx80Exp( a ); 3818129203Scognet aSign = extractFloatx80Sign( a ); 3819129203Scognet bSig = extractFloatx80Frac( b ); 3820129203Scognet bExp = extractFloatx80Exp( b ); 3821129203Scognet bSign = extractFloatx80Sign( b ); 3822129203Scognet zSign = aSign ^ bSign; 3823129203Scognet if ( aExp == 0x7FFF ) { 3824129203Scognet if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3825129203Scognet if ( bExp == 0x7FFF ) { 3826129203Scognet if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3827129203Scognet goto invalid; 3828129203Scognet } 3829129203Scognet return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3830129203Scognet } 3831129203Scognet if ( bExp == 0x7FFF ) { 3832129203Scognet if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3833129203Scognet return packFloatx80( zSign, 0, 0 ); 3834129203Scognet } 3835129203Scognet if ( bExp == 0 ) { 3836129203Scognet if ( bSig == 0 ) { 3837129203Scognet if ( ( aExp | aSig ) == 0 ) { 3838129203Scognet invalid: 3839129203Scognet float_raise( float_flag_invalid ); 3840129203Scognet z.low = floatx80_default_nan_low; 3841129203Scognet z.high = floatx80_default_nan_high; 3842129203Scognet return z; 3843129203Scognet } 3844129203Scognet float_raise( float_flag_divbyzero ); 3845129203Scognet return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3846129203Scognet } 3847129203Scognet normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3848129203Scognet } 3849129203Scognet if ( aExp == 0 ) { 3850129203Scognet if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 3851129203Scognet normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 3852129203Scognet } 3853129203Scognet zExp = aExp - bExp + 0x3FFE; 3854129203Scognet rem1 = 0; 3855129203Scognet if ( bSig <= aSig ) { 3856129203Scognet shift128Right( aSig, 0, 1, &aSig, &rem1 ); 3857129203Scognet ++zExp; 3858129203Scognet } 3859129203Scognet zSig0 = estimateDiv128To64( aSig, rem1, bSig ); 3860129203Scognet mul64To128( bSig, zSig0, &term0, &term1 ); 3861129203Scognet sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); 3862129203Scognet while ( (sbits64) rem0 < 0 ) { 3863129203Scognet --zSig0; 3864129203Scognet add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 3865129203Scognet } 3866129203Scognet zSig1 = estimateDiv128To64( rem1, 0, bSig ); 3867129203Scognet if ( (bits64) ( zSig1<<1 ) <= 8 ) { 3868129203Scognet mul64To128( bSig, zSig1, &term1, &term2 ); 3869129203Scognet sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 3870129203Scognet while ( (sbits64) rem1 < 0 ) { 3871129203Scognet --zSig1; 3872129203Scognet add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); 3873129203Scognet } 3874129203Scognet zSig1 |= ( ( rem1 | rem2 ) != 0 ); 3875129203Scognet } 3876129203Scognet return 3877129203Scognet roundAndPackFloatx80( 3878129203Scognet floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3879129203Scognet 3880129203Scognet} 3881129203Scognet 3882129203Scognet/* 3883129203Scognet------------------------------------------------------------------------------- 3884129203ScognetReturns the remainder of the extended double-precision floating-point value 3885129203Scognet`a' with respect to the corresponding value `b'. The operation is performed 3886129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3887129203Scognet------------------------------------------------------------------------------- 3888129203Scognet*/ 3889129203Scognetfloatx80 floatx80_rem( floatx80 a, floatx80 b ) 3890129203Scognet{ 3891129203Scognet flag aSign, bSign, zSign; 3892129203Scognet int32 aExp, bExp, expDiff; 3893129203Scognet bits64 aSig0, aSig1, bSig; 3894129203Scognet bits64 q, term0, term1, alternateASig0, alternateASig1; 3895129203Scognet floatx80 z; 3896129203Scognet 3897129203Scognet aSig0 = extractFloatx80Frac( a ); 3898129203Scognet aExp = extractFloatx80Exp( a ); 3899129203Scognet aSign = extractFloatx80Sign( a ); 3900129203Scognet bSig = extractFloatx80Frac( b ); 3901129203Scognet bExp = extractFloatx80Exp( b ); 3902129203Scognet bSign = extractFloatx80Sign( b ); 3903129203Scognet if ( aExp == 0x7FFF ) { 3904129203Scognet if ( (bits64) ( aSig0<<1 ) 3905129203Scognet || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { 3906129203Scognet return propagateFloatx80NaN( a, b ); 3907129203Scognet } 3908129203Scognet goto invalid; 3909129203Scognet } 3910129203Scognet if ( bExp == 0x7FFF ) { 3911129203Scognet if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3912129203Scognet return a; 3913129203Scognet } 3914129203Scognet if ( bExp == 0 ) { 3915129203Scognet if ( bSig == 0 ) { 3916129203Scognet invalid: 3917129203Scognet float_raise( float_flag_invalid ); 3918129203Scognet z.low = floatx80_default_nan_low; 3919129203Scognet z.high = floatx80_default_nan_high; 3920129203Scognet return z; 3921129203Scognet } 3922129203Scognet normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3923129203Scognet } 3924129203Scognet if ( aExp == 0 ) { 3925129203Scognet if ( (bits64) ( aSig0<<1 ) == 0 ) return a; 3926129203Scognet normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 3927129203Scognet } 3928129203Scognet bSig |= LIT64( 0x8000000000000000 ); 3929129203Scognet zSign = aSign; 3930129203Scognet expDiff = aExp - bExp; 3931129203Scognet aSig1 = 0; 3932129203Scognet if ( expDiff < 0 ) { 3933129203Scognet if ( expDiff < -1 ) return a; 3934129203Scognet shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); 3935129203Scognet expDiff = 0; 3936129203Scognet } 3937129203Scognet q = ( bSig <= aSig0 ); 3938129203Scognet if ( q ) aSig0 -= bSig; 3939129203Scognet expDiff -= 64; 3940129203Scognet while ( 0 < expDiff ) { 3941129203Scognet q = estimateDiv128To64( aSig0, aSig1, bSig ); 3942129203Scognet q = ( 2 < q ) ? q - 2 : 0; 3943129203Scognet mul64To128( bSig, q, &term0, &term1 ); 3944129203Scognet sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 3945129203Scognet shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); 3946129203Scognet expDiff -= 62; 3947129203Scognet } 3948129203Scognet expDiff += 64; 3949129203Scognet if ( 0 < expDiff ) { 3950129203Scognet q = estimateDiv128To64( aSig0, aSig1, bSig ); 3951129203Scognet q = ( 2 < q ) ? q - 2 : 0; 3952129203Scognet q >>= 64 - expDiff; 3953129203Scognet mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); 3954129203Scognet sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 3955129203Scognet shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); 3956129203Scognet while ( le128( term0, term1, aSig0, aSig1 ) ) { 3957129203Scognet ++q; 3958129203Scognet sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 3959129203Scognet } 3960129203Scognet } 3961129203Scognet else { 3962129203Scognet term1 = 0; 3963129203Scognet term0 = bSig; 3964129203Scognet } 3965129203Scognet sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); 3966129203Scognet if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) 3967129203Scognet || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) 3968129203Scognet && ( q & 1 ) ) 3969129203Scognet ) { 3970129203Scognet aSig0 = alternateASig0; 3971129203Scognet aSig1 = alternateASig1; 3972129203Scognet zSign = ! zSign; 3973129203Scognet } 3974129203Scognet return 3975129203Scognet normalizeRoundAndPackFloatx80( 3976129203Scognet 80, zSign, bExp + expDiff, aSig0, aSig1 ); 3977129203Scognet 3978129203Scognet} 3979129203Scognet 3980129203Scognet/* 3981129203Scognet------------------------------------------------------------------------------- 3982129203ScognetReturns the square root of the extended double-precision floating-point 3983129203Scognetvalue `a'. The operation is performed according to the IEC/IEEE Standard 3984129203Scognetfor Binary Floating-Point Arithmetic. 3985129203Scognet------------------------------------------------------------------------------- 3986129203Scognet*/ 3987129203Scognetfloatx80 floatx80_sqrt( floatx80 a ) 3988129203Scognet{ 3989129203Scognet flag aSign; 3990129203Scognet int32 aExp, zExp; 3991129203Scognet bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0; 3992129203Scognet bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 3993129203Scognet floatx80 z; 3994129203Scognet 3995129203Scognet aSig0 = extractFloatx80Frac( a ); 3996129203Scognet aExp = extractFloatx80Exp( a ); 3997129203Scognet aSign = extractFloatx80Sign( a ); 3998129203Scognet if ( aExp == 0x7FFF ) { 3999129203Scognet if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); 4000129203Scognet if ( ! aSign ) return a; 4001129203Scognet goto invalid; 4002129203Scognet } 4003129203Scognet if ( aSign ) { 4004129203Scognet if ( ( aExp | aSig0 ) == 0 ) return a; 4005129203Scognet invalid: 4006129203Scognet float_raise( float_flag_invalid ); 4007129203Scognet z.low = floatx80_default_nan_low; 4008129203Scognet z.high = floatx80_default_nan_high; 4009129203Scognet return z; 4010129203Scognet } 4011129203Scognet if ( aExp == 0 ) { 4012129203Scognet if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); 4013129203Scognet normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 4014129203Scognet } 4015129203Scognet zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; 4016129203Scognet zSig0 = estimateSqrt32( aExp, aSig0>>32 ); 4017129203Scognet shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); 4018129203Scognet zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); 4019129203Scognet doubleZSig0 = zSig0<<1; 4020129203Scognet mul64To128( zSig0, zSig0, &term0, &term1 ); 4021129203Scognet sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 4022129203Scognet while ( (sbits64) rem0 < 0 ) { 4023129203Scognet --zSig0; 4024129203Scognet doubleZSig0 -= 2; 4025129203Scognet add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); 4026129203Scognet } 4027129203Scognet zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); 4028129203Scognet if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { 4029129203Scognet if ( zSig1 == 0 ) zSig1 = 1; 4030129203Scognet mul64To128( doubleZSig0, zSig1, &term1, &term2 ); 4031129203Scognet sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 4032129203Scognet mul64To128( zSig1, zSig1, &term2, &term3 ); 4033129203Scognet sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 4034129203Scognet while ( (sbits64) rem1 < 0 ) { 4035129203Scognet --zSig1; 4036129203Scognet shortShift128Left( 0, zSig1, 1, &term2, &term3 ); 4037129203Scognet term3 |= 1; 4038129203Scognet term2 |= doubleZSig0; 4039129203Scognet add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 4040129203Scognet } 4041129203Scognet zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 4042129203Scognet } 4043129203Scognet shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); 4044129203Scognet zSig0 |= doubleZSig0; 4045129203Scognet return 4046129203Scognet roundAndPackFloatx80( 4047129203Scognet floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); 4048129203Scognet 4049129203Scognet} 4050129203Scognet 4051129203Scognet/* 4052129203Scognet------------------------------------------------------------------------------- 4053129203ScognetReturns 1 if the extended double-precision floating-point value `a' is 4054129203Scognetequal to the corresponding value `b', and 0 otherwise. The comparison is 4055129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-Point 4056129203ScognetArithmetic. 4057129203Scognet------------------------------------------------------------------------------- 4058129203Scognet*/ 4059129203Scognetflag floatx80_eq( floatx80 a, floatx80 b ) 4060129203Scognet{ 4061129203Scognet 4062129203Scognet if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4063129203Scognet && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4064129203Scognet || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4065129203Scognet && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4066129203Scognet ) { 4067129203Scognet if ( floatx80_is_signaling_nan( a ) 4068129203Scognet || floatx80_is_signaling_nan( b ) ) { 4069129203Scognet float_raise( float_flag_invalid ); 4070129203Scognet } 4071129203Scognet return 0; 4072129203Scognet } 4073129203Scognet return 4074129203Scognet ( a.low == b.low ) 4075129203Scognet && ( ( a.high == b.high ) 4076129203Scognet || ( ( a.low == 0 ) 4077129203Scognet && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) 4078129203Scognet ); 4079129203Scognet 4080129203Scognet} 4081129203Scognet 4082129203Scognet/* 4083129203Scognet------------------------------------------------------------------------------- 4084129203ScognetReturns 1 if the extended double-precision floating-point value `a' is 4085129203Scognetless than or equal to the corresponding value `b', and 0 otherwise. The 4086129203Scognetcomparison is performed according to the IEC/IEEE Standard for Binary 4087129203ScognetFloating-Point Arithmetic. 4088129203Scognet------------------------------------------------------------------------------- 4089129203Scognet*/ 4090129203Scognetflag floatx80_le( floatx80 a, floatx80 b ) 4091129203Scognet{ 4092129203Scognet flag aSign, bSign; 4093129203Scognet 4094129203Scognet if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4095129203Scognet && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4096129203Scognet || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4097129203Scognet && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4098129203Scognet ) { 4099129203Scognet float_raise( float_flag_invalid ); 4100129203Scognet return 0; 4101129203Scognet } 4102129203Scognet aSign = extractFloatx80Sign( a ); 4103129203Scognet bSign = extractFloatx80Sign( b ); 4104129203Scognet if ( aSign != bSign ) { 4105129203Scognet return 4106129203Scognet aSign 4107129203Scognet || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 4108129203Scognet == 0 ); 4109129203Scognet } 4110129203Scognet return 4111129203Scognet aSign ? le128( b.high, b.low, a.high, a.low ) 4112129203Scognet : le128( a.high, a.low, b.high, b.low ); 4113129203Scognet 4114129203Scognet} 4115129203Scognet 4116129203Scognet/* 4117129203Scognet------------------------------------------------------------------------------- 4118129203ScognetReturns 1 if the extended double-precision floating-point value `a' is 4119129203Scognetless than the corresponding value `b', and 0 otherwise. The comparison 4120129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4121129203ScognetArithmetic. 4122129203Scognet------------------------------------------------------------------------------- 4123129203Scognet*/ 4124129203Scognetflag floatx80_lt( floatx80 a, floatx80 b ) 4125129203Scognet{ 4126129203Scognet flag aSign, bSign; 4127129203Scognet 4128129203Scognet if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4129129203Scognet && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4130129203Scognet || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4131129203Scognet && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4132129203Scognet ) { 4133129203Scognet float_raise( float_flag_invalid ); 4134129203Scognet return 0; 4135129203Scognet } 4136129203Scognet aSign = extractFloatx80Sign( a ); 4137129203Scognet bSign = extractFloatx80Sign( b ); 4138129203Scognet if ( aSign != bSign ) { 4139129203Scognet return 4140129203Scognet aSign 4141129203Scognet && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 4142129203Scognet != 0 ); 4143129203Scognet } 4144129203Scognet return 4145129203Scognet aSign ? lt128( b.high, b.low, a.high, a.low ) 4146129203Scognet : lt128( a.high, a.low, b.high, b.low ); 4147129203Scognet 4148129203Scognet} 4149129203Scognet 4150129203Scognet/* 4151129203Scognet------------------------------------------------------------------------------- 4152129203ScognetReturns 1 if the extended double-precision floating-point value `a' is equal 4153129203Scognetto the corresponding value `b', and 0 otherwise. The invalid exception is 4154129203Scognetraised if either operand is a NaN. Otherwise, the comparison is performed 4155129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4156129203Scognet------------------------------------------------------------------------------- 4157129203Scognet*/ 4158129203Scognetflag floatx80_eq_signaling( floatx80 a, floatx80 b ) 4159129203Scognet{ 4160129203Scognet 4161129203Scognet if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4162129203Scognet && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4163129203Scognet || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4164129203Scognet && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4165129203Scognet ) { 4166129203Scognet float_raise( float_flag_invalid ); 4167129203Scognet return 0; 4168129203Scognet } 4169129203Scognet return 4170129203Scognet ( a.low == b.low ) 4171129203Scognet && ( ( a.high == b.high ) 4172129203Scognet || ( ( a.low == 0 ) 4173129203Scognet && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) 4174129203Scognet ); 4175129203Scognet 4176129203Scognet} 4177129203Scognet 4178129203Scognet/* 4179129203Scognet------------------------------------------------------------------------------- 4180129203ScognetReturns 1 if the extended double-precision floating-point value `a' is less 4181129203Scognetthan or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs 4182129203Scognetdo not cause an exception. Otherwise, the comparison is performed according 4183129203Scognetto the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4184129203Scognet------------------------------------------------------------------------------- 4185129203Scognet*/ 4186129203Scognetflag floatx80_le_quiet( floatx80 a, floatx80 b ) 4187129203Scognet{ 4188129203Scognet flag aSign, bSign; 4189129203Scognet 4190129203Scognet if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4191129203Scognet && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4192129203Scognet || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4193129203Scognet && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4194129203Scognet ) { 4195129203Scognet if ( floatx80_is_signaling_nan( a ) 4196129203Scognet || floatx80_is_signaling_nan( b ) ) { 4197129203Scognet float_raise( float_flag_invalid ); 4198129203Scognet } 4199129203Scognet return 0; 4200129203Scognet } 4201129203Scognet aSign = extractFloatx80Sign( a ); 4202129203Scognet bSign = extractFloatx80Sign( b ); 4203129203Scognet if ( aSign != bSign ) { 4204129203Scognet return 4205129203Scognet aSign 4206129203Scognet || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 4207129203Scognet == 0 ); 4208129203Scognet } 4209129203Scognet return 4210129203Scognet aSign ? le128( b.high, b.low, a.high, a.low ) 4211129203Scognet : le128( a.high, a.low, b.high, b.low ); 4212129203Scognet 4213129203Scognet} 4214129203Scognet 4215129203Scognet/* 4216129203Scognet------------------------------------------------------------------------------- 4217129203ScognetReturns 1 if the extended double-precision floating-point value `a' is less 4218129203Scognetthan the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause 4219129203Scognetan exception. Otherwise, the comparison is performed according to the 4220129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 4221129203Scognet------------------------------------------------------------------------------- 4222129203Scognet*/ 4223129203Scognetflag floatx80_lt_quiet( floatx80 a, floatx80 b ) 4224129203Scognet{ 4225129203Scognet flag aSign, bSign; 4226129203Scognet 4227129203Scognet if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4228129203Scognet && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4229129203Scognet || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4230129203Scognet && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4231129203Scognet ) { 4232129203Scognet if ( floatx80_is_signaling_nan( a ) 4233129203Scognet || floatx80_is_signaling_nan( b ) ) { 4234129203Scognet float_raise( float_flag_invalid ); 4235129203Scognet } 4236129203Scognet return 0; 4237129203Scognet } 4238129203Scognet aSign = extractFloatx80Sign( a ); 4239129203Scognet bSign = extractFloatx80Sign( b ); 4240129203Scognet if ( aSign != bSign ) { 4241129203Scognet return 4242129203Scognet aSign 4243129203Scognet && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 4244129203Scognet != 0 ); 4245129203Scognet } 4246129203Scognet return 4247129203Scognet aSign ? lt128( b.high, b.low, a.high, a.low ) 4248129203Scognet : lt128( a.high, a.low, b.high, b.low ); 4249129203Scognet 4250129203Scognet} 4251129203Scognet 4252129203Scognet#endif 4253129203Scognet 4254129203Scognet#ifdef FLOAT128 4255129203Scognet 4256129203Scognet/* 4257129203Scognet------------------------------------------------------------------------------- 4258129203ScognetReturns the result of converting the quadruple-precision floating-point 4259129203Scognetvalue `a' to the 32-bit two's complement integer format. The conversion 4260129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4261129203ScognetArithmetic---which means in particular that the conversion is rounded 4262129203Scognetaccording to the current rounding mode. If `a' is a NaN, the largest 4263129203Scognetpositive integer is returned. Otherwise, if the conversion overflows, the 4264129203Scognetlargest integer with the same sign as `a' is returned. 4265129203Scognet------------------------------------------------------------------------------- 4266129203Scognet*/ 4267129203Scognetint32 float128_to_int32( float128 a ) 4268129203Scognet{ 4269129203Scognet flag aSign; 4270129203Scognet int32 aExp, shiftCount; 4271129203Scognet bits64 aSig0, aSig1; 4272129203Scognet 4273129203Scognet aSig1 = extractFloat128Frac1( a ); 4274129203Scognet aSig0 = extractFloat128Frac0( a ); 4275129203Scognet aExp = extractFloat128Exp( a ); 4276129203Scognet aSign = extractFloat128Sign( a ); 4277129203Scognet if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; 4278129203Scognet if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4279129203Scognet aSig0 |= ( aSig1 != 0 ); 4280129203Scognet shiftCount = 0x4028 - aExp; 4281129203Scognet if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); 4282129203Scognet return roundAndPackInt32( aSign, aSig0 ); 4283129203Scognet 4284129203Scognet} 4285129203Scognet 4286129203Scognet/* 4287129203Scognet------------------------------------------------------------------------------- 4288129203ScognetReturns the result of converting the quadruple-precision floating-point 4289129203Scognetvalue `a' to the 32-bit two's complement integer format. The conversion 4290129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4291129203ScognetArithmetic, except that the conversion is always rounded toward zero. If 4292129203Scognet`a' is a NaN, the largest positive integer is returned. Otherwise, if the 4293129203Scognetconversion overflows, the largest integer with the same sign as `a' is 4294129203Scognetreturned. 4295129203Scognet------------------------------------------------------------------------------- 4296129203Scognet*/ 4297129203Scognetint32 float128_to_int32_round_to_zero( float128 a ) 4298129203Scognet{ 4299129203Scognet flag aSign; 4300129203Scognet int32 aExp, shiftCount; 4301129203Scognet bits64 aSig0, aSig1, savedASig; 4302129203Scognet int32 z; 4303129203Scognet 4304129203Scognet aSig1 = extractFloat128Frac1( a ); 4305129203Scognet aSig0 = extractFloat128Frac0( a ); 4306129203Scognet aExp = extractFloat128Exp( a ); 4307129203Scognet aSign = extractFloat128Sign( a ); 4308129203Scognet aSig0 |= ( aSig1 != 0 ); 4309129203Scognet if ( 0x401E < aExp ) { 4310129203Scognet if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0; 4311129203Scognet goto invalid; 4312129203Scognet } 4313129203Scognet else if ( aExp < 0x3FFF ) { 4314129203Scognet if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact; 4315129203Scognet return 0; 4316129203Scognet } 4317129203Scognet aSig0 |= LIT64( 0x0001000000000000 ); 4318129203Scognet shiftCount = 0x402F - aExp; 4319129203Scognet savedASig = aSig0; 4320129203Scognet aSig0 >>= shiftCount; 4321129203Scognet z = aSig0; 4322129203Scognet if ( aSign ) z = - z; 4323129203Scognet if ( ( z < 0 ) ^ aSign ) { 4324129203Scognet invalid: 4325129203Scognet float_raise( float_flag_invalid ); 4326129203Scognet return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 4327129203Scognet } 4328129203Scognet if ( ( aSig0<<shiftCount ) != savedASig ) { 4329129203Scognet float_exception_flags |= float_flag_inexact; 4330129203Scognet } 4331129203Scognet return z; 4332129203Scognet 4333129203Scognet} 4334129203Scognet 4335129203Scognet/* 4336129203Scognet------------------------------------------------------------------------------- 4337129203ScognetReturns the result of converting the quadruple-precision floating-point 4338129203Scognetvalue `a' to the 64-bit two's complement integer format. The conversion 4339129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4340129203ScognetArithmetic---which means in particular that the conversion is rounded 4341129203Scognetaccording to the current rounding mode. If `a' is a NaN, the largest 4342129203Scognetpositive integer is returned. Otherwise, if the conversion overflows, the 4343129203Scognetlargest integer with the same sign as `a' is returned. 4344129203Scognet------------------------------------------------------------------------------- 4345129203Scognet*/ 4346129203Scognetint64 float128_to_int64( float128 a ) 4347129203Scognet{ 4348129203Scognet flag aSign; 4349129203Scognet int32 aExp, shiftCount; 4350129203Scognet bits64 aSig0, aSig1; 4351129203Scognet 4352129203Scognet aSig1 = extractFloat128Frac1( a ); 4353129203Scognet aSig0 = extractFloat128Frac0( a ); 4354129203Scognet aExp = extractFloat128Exp( a ); 4355129203Scognet aSign = extractFloat128Sign( a ); 4356129203Scognet if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4357129203Scognet shiftCount = 0x402F - aExp; 4358129203Scognet if ( shiftCount <= 0 ) { 4359129203Scognet if ( 0x403E < aExp ) { 4360129203Scognet float_raise( float_flag_invalid ); 4361129203Scognet if ( ! aSign 4362129203Scognet || ( ( aExp == 0x7FFF ) 4363129203Scognet && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) ) 4364129203Scognet ) 4365129203Scognet ) { 4366129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 4367129203Scognet } 4368129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 4369129203Scognet } 4370129203Scognet shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 ); 4371129203Scognet } 4372129203Scognet else { 4373129203Scognet shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 ); 4374129203Scognet } 4375129203Scognet return roundAndPackInt64( aSign, aSig0, aSig1 ); 4376129203Scognet 4377129203Scognet} 4378129203Scognet 4379129203Scognet/* 4380129203Scognet------------------------------------------------------------------------------- 4381129203ScognetReturns the result of converting the quadruple-precision floating-point 4382129203Scognetvalue `a' to the 64-bit two's complement integer format. The conversion 4383129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4384129203ScognetArithmetic, except that the conversion is always rounded toward zero. 4385129203ScognetIf `a' is a NaN, the largest positive integer is returned. Otherwise, if 4386129203Scognetthe conversion overflows, the largest integer with the same sign as `a' is 4387129203Scognetreturned. 4388129203Scognet------------------------------------------------------------------------------- 4389129203Scognet*/ 4390129203Scognetint64 float128_to_int64_round_to_zero( float128 a ) 4391129203Scognet{ 4392129203Scognet flag aSign; 4393129203Scognet int32 aExp, shiftCount; 4394129203Scognet bits64 aSig0, aSig1; 4395129203Scognet int64 z; 4396129203Scognet 4397129203Scognet aSig1 = extractFloat128Frac1( a ); 4398129203Scognet aSig0 = extractFloat128Frac0( a ); 4399129203Scognet aExp = extractFloat128Exp( a ); 4400129203Scognet aSign = extractFloat128Sign( a ); 4401129203Scognet if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4402129203Scognet shiftCount = aExp - 0x402F; 4403129203Scognet if ( 0 < shiftCount ) { 4404129203Scognet if ( 0x403E <= aExp ) { 4405129203Scognet aSig0 &= LIT64( 0x0000FFFFFFFFFFFF ); 4406129203Scognet if ( ( a.high == LIT64( 0xC03E000000000000 ) ) 4407129203Scognet && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) { 4408129203Scognet if ( aSig1 ) float_exception_flags |= float_flag_inexact; 4409129203Scognet } 4410129203Scognet else { 4411129203Scognet float_raise( float_flag_invalid ); 4412129203Scognet if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) { 4413129203Scognet return LIT64( 0x7FFFFFFFFFFFFFFF ); 4414129203Scognet } 4415129203Scognet } 4416129203Scognet return (sbits64) LIT64( 0x8000000000000000 ); 4417129203Scognet } 4418129203Scognet z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) ); 4419129203Scognet if ( (bits64) ( aSig1<<shiftCount ) ) { 4420129203Scognet float_exception_flags |= float_flag_inexact; 4421129203Scognet } 4422129203Scognet } 4423129203Scognet else { 4424129203Scognet if ( aExp < 0x3FFF ) { 4425129203Scognet if ( aExp | aSig0 | aSig1 ) { 4426129203Scognet float_exception_flags |= float_flag_inexact; 4427129203Scognet } 4428129203Scognet return 0; 4429129203Scognet } 4430129203Scognet z = aSig0>>( - shiftCount ); 4431129203Scognet if ( aSig1 4432129203Scognet || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) { 4433129203Scognet float_exception_flags |= float_flag_inexact; 4434129203Scognet } 4435129203Scognet } 4436129203Scognet if ( aSign ) z = - z; 4437129203Scognet return z; 4438129203Scognet 4439129203Scognet} 4440129203Scognet 4441129203Scognet/* 4442129203Scognet------------------------------------------------------------------------------- 4443129203ScognetReturns the result of converting the quadruple-precision floating-point 4444129203Scognetvalue `a' to the single-precision floating-point format. The conversion 4445129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4446129203ScognetArithmetic. 4447129203Scognet------------------------------------------------------------------------------- 4448129203Scognet*/ 4449129203Scognetfloat32 float128_to_float32( float128 a ) 4450129203Scognet{ 4451129203Scognet flag aSign; 4452129203Scognet int32 aExp; 4453129203Scognet bits64 aSig0, aSig1; 4454129203Scognet bits32 zSig; 4455129203Scognet 4456129203Scognet aSig1 = extractFloat128Frac1( a ); 4457129203Scognet aSig0 = extractFloat128Frac0( a ); 4458129203Scognet aExp = extractFloat128Exp( a ); 4459129203Scognet aSign = extractFloat128Sign( a ); 4460129203Scognet if ( aExp == 0x7FFF ) { 4461129203Scognet if ( aSig0 | aSig1 ) { 4462129203Scognet return commonNaNToFloat32( float128ToCommonNaN( a ) ); 4463129203Scognet } 4464129203Scognet return packFloat32( aSign, 0xFF, 0 ); 4465129203Scognet } 4466129203Scognet aSig0 |= ( aSig1 != 0 ); 4467129203Scognet shift64RightJamming( aSig0, 18, &aSig0 ); 4468129203Scognet zSig = aSig0; 4469129203Scognet if ( aExp || zSig ) { 4470129203Scognet zSig |= 0x40000000; 4471129203Scognet aExp -= 0x3F81; 4472129203Scognet } 4473129203Scognet return roundAndPackFloat32( aSign, aExp, zSig ); 4474129203Scognet 4475129203Scognet} 4476129203Scognet 4477129203Scognet/* 4478129203Scognet------------------------------------------------------------------------------- 4479129203ScognetReturns the result of converting the quadruple-precision floating-point 4480129203Scognetvalue `a' to the double-precision floating-point format. The conversion 4481129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 4482129203ScognetArithmetic. 4483129203Scognet------------------------------------------------------------------------------- 4484129203Scognet*/ 4485129203Scognetfloat64 float128_to_float64( float128 a ) 4486129203Scognet{ 4487129203Scognet flag aSign; 4488129203Scognet int32 aExp; 4489129203Scognet bits64 aSig0, aSig1; 4490129203Scognet 4491129203Scognet aSig1 = extractFloat128Frac1( a ); 4492129203Scognet aSig0 = extractFloat128Frac0( a ); 4493129203Scognet aExp = extractFloat128Exp( a ); 4494129203Scognet aSign = extractFloat128Sign( a ); 4495129203Scognet if ( aExp == 0x7FFF ) { 4496129203Scognet if ( aSig0 | aSig1 ) { 4497129203Scognet return commonNaNToFloat64( float128ToCommonNaN( a ) ); 4498129203Scognet } 4499129203Scognet return packFloat64( aSign, 0x7FF, 0 ); 4500129203Scognet } 4501129203Scognet shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); 4502129203Scognet aSig0 |= ( aSig1 != 0 ); 4503129203Scognet if ( aExp || aSig0 ) { 4504129203Scognet aSig0 |= LIT64( 0x4000000000000000 ); 4505129203Scognet aExp -= 0x3C01; 4506129203Scognet } 4507129203Scognet return roundAndPackFloat64( aSign, aExp, aSig0 ); 4508129203Scognet 4509129203Scognet} 4510129203Scognet 4511129203Scognet#ifdef FLOATX80 4512129203Scognet 4513129203Scognet/* 4514129203Scognet------------------------------------------------------------------------------- 4515129203ScognetReturns the result of converting the quadruple-precision floating-point 4516129203Scognetvalue `a' to the extended double-precision floating-point format. The 4517129203Scognetconversion is performed according to the IEC/IEEE Standard for Binary 4518129203ScognetFloating-Point Arithmetic. 4519129203Scognet------------------------------------------------------------------------------- 4520129203Scognet*/ 4521129203Scognetfloatx80 float128_to_floatx80( float128 a ) 4522129203Scognet{ 4523129203Scognet flag aSign; 4524129203Scognet int32 aExp; 4525129203Scognet bits64 aSig0, aSig1; 4526129203Scognet 4527129203Scognet aSig1 = extractFloat128Frac1( a ); 4528129203Scognet aSig0 = extractFloat128Frac0( a ); 4529129203Scognet aExp = extractFloat128Exp( a ); 4530129203Scognet aSign = extractFloat128Sign( a ); 4531129203Scognet if ( aExp == 0x7FFF ) { 4532129203Scognet if ( aSig0 | aSig1 ) { 4533129203Scognet return commonNaNToFloatx80( float128ToCommonNaN( a ) ); 4534129203Scognet } 4535129203Scognet return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 4536129203Scognet } 4537129203Scognet if ( aExp == 0 ) { 4538129203Scognet if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); 4539129203Scognet normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 4540129203Scognet } 4541129203Scognet else { 4542129203Scognet aSig0 |= LIT64( 0x0001000000000000 ); 4543129203Scognet } 4544129203Scognet shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); 4545129203Scognet return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 ); 4546129203Scognet 4547129203Scognet} 4548129203Scognet 4549129203Scognet#endif 4550129203Scognet 4551129203Scognet/* 4552129203Scognet------------------------------------------------------------------------------- 4553129203ScognetRounds the quadruple-precision floating-point value `a' to an integer, and 4554129203Scognetreturns the result as a quadruple-precision floating-point value. The 4555129203Scognetoperation is performed according to the IEC/IEEE Standard for Binary 4556129203ScognetFloating-Point Arithmetic. 4557129203Scognet------------------------------------------------------------------------------- 4558129203Scognet*/ 4559129203Scognetfloat128 float128_round_to_int( float128 a ) 4560129203Scognet{ 4561129203Scognet flag aSign; 4562129203Scognet int32 aExp; 4563129203Scognet bits64 lastBitMask, roundBitsMask; 4564129203Scognet int8 roundingMode; 4565129203Scognet float128 z; 4566129203Scognet 4567129203Scognet aExp = extractFloat128Exp( a ); 4568129203Scognet if ( 0x402F <= aExp ) { 4569129203Scognet if ( 0x406F <= aExp ) { 4570129203Scognet if ( ( aExp == 0x7FFF ) 4571129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) 4572129203Scognet ) { 4573129203Scognet return propagateFloat128NaN( a, a ); 4574129203Scognet } 4575129203Scognet return a; 4576129203Scognet } 4577129203Scognet lastBitMask = 1; 4578129203Scognet lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; 4579129203Scognet roundBitsMask = lastBitMask - 1; 4580129203Scognet z = a; 4581129203Scognet roundingMode = float_rounding_mode; 4582129203Scognet if ( roundingMode == float_round_nearest_even ) { 4583129203Scognet if ( lastBitMask ) { 4584129203Scognet add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); 4585129203Scognet if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 4586129203Scognet } 4587129203Scognet else { 4588129203Scognet if ( (sbits64) z.low < 0 ) { 4589129203Scognet ++z.high; 4590129203Scognet if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1; 4591129203Scognet } 4592129203Scognet } 4593129203Scognet } 4594129203Scognet else if ( roundingMode != float_round_to_zero ) { 4595129203Scognet if ( extractFloat128Sign( z ) 4596129203Scognet ^ ( roundingMode == float_round_up ) ) { 4597129203Scognet add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); 4598129203Scognet } 4599129203Scognet } 4600129203Scognet z.low &= ~ roundBitsMask; 4601129203Scognet } 4602129203Scognet else { 4603129203Scognet if ( aExp < 0x3FFF ) { 4604129203Scognet if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a; 4605129203Scognet float_exception_flags |= float_flag_inexact; 4606129203Scognet aSign = extractFloat128Sign( a ); 4607129203Scognet switch ( float_rounding_mode ) { 4608129203Scognet case float_round_nearest_even: 4609129203Scognet if ( ( aExp == 0x3FFE ) 4610129203Scognet && ( extractFloat128Frac0( a ) 4611129203Scognet | extractFloat128Frac1( a ) ) 4612129203Scognet ) { 4613129203Scognet return packFloat128( aSign, 0x3FFF, 0, 0 ); 4614129203Scognet } 4615129203Scognet break; 4616129203Scognet case float_round_to_zero: 4617129203Scognet break; 4618129203Scognet case float_round_down: 4619129203Scognet return 4620129203Scognet aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) 4621129203Scognet : packFloat128( 0, 0, 0, 0 ); 4622129203Scognet case float_round_up: 4623129203Scognet return 4624129203Scognet aSign ? packFloat128( 1, 0, 0, 0 ) 4625129203Scognet : packFloat128( 0, 0x3FFF, 0, 0 ); 4626129203Scognet } 4627129203Scognet return packFloat128( aSign, 0, 0, 0 ); 4628129203Scognet } 4629129203Scognet lastBitMask = 1; 4630129203Scognet lastBitMask <<= 0x402F - aExp; 4631129203Scognet roundBitsMask = lastBitMask - 1; 4632129203Scognet z.low = 0; 4633129203Scognet z.high = a.high; 4634129203Scognet roundingMode = float_rounding_mode; 4635129203Scognet if ( roundingMode == float_round_nearest_even ) { 4636129203Scognet z.high += lastBitMask>>1; 4637129203Scognet if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { 4638129203Scognet z.high &= ~ lastBitMask; 4639129203Scognet } 4640129203Scognet } 4641129203Scognet else if ( roundingMode != float_round_to_zero ) { 4642129203Scognet if ( extractFloat128Sign( z ) 4643129203Scognet ^ ( roundingMode == float_round_up ) ) { 4644129203Scognet z.high |= ( a.low != 0 ); 4645129203Scognet z.high += roundBitsMask; 4646129203Scognet } 4647129203Scognet } 4648129203Scognet z.high &= ~ roundBitsMask; 4649129203Scognet } 4650129203Scognet if ( ( z.low != a.low ) || ( z.high != a.high ) ) { 4651129203Scognet float_exception_flags |= float_flag_inexact; 4652129203Scognet } 4653129203Scognet return z; 4654129203Scognet 4655129203Scognet} 4656129203Scognet 4657129203Scognet/* 4658129203Scognet------------------------------------------------------------------------------- 4659129203ScognetReturns the result of adding the absolute values of the quadruple-precision 4660129203Scognetfloating-point values `a' and `b'. If `zSign' is 1, the sum is negated 4661129203Scognetbefore being returned. `zSign' is ignored if the result is a NaN. 4662129203ScognetThe addition is performed according to the IEC/IEEE Standard for Binary 4663129203ScognetFloating-Point Arithmetic. 4664129203Scognet------------------------------------------------------------------------------- 4665129203Scognet*/ 4666129203Scognetstatic float128 addFloat128Sigs( float128 a, float128 b, flag zSign ) 4667129203Scognet{ 4668129203Scognet int32 aExp, bExp, zExp; 4669129203Scognet bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 4670129203Scognet int32 expDiff; 4671129203Scognet 4672129203Scognet aSig1 = extractFloat128Frac1( a ); 4673129203Scognet aSig0 = extractFloat128Frac0( a ); 4674129203Scognet aExp = extractFloat128Exp( a ); 4675129203Scognet bSig1 = extractFloat128Frac1( b ); 4676129203Scognet bSig0 = extractFloat128Frac0( b ); 4677129203Scognet bExp = extractFloat128Exp( b ); 4678129203Scognet expDiff = aExp - bExp; 4679129203Scognet if ( 0 < expDiff ) { 4680129203Scognet if ( aExp == 0x7FFF ) { 4681129203Scognet if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 4682129203Scognet return a; 4683129203Scognet } 4684129203Scognet if ( bExp == 0 ) { 4685129203Scognet --expDiff; 4686129203Scognet } 4687129203Scognet else { 4688129203Scognet bSig0 |= LIT64( 0x0001000000000000 ); 4689129203Scognet } 4690129203Scognet shift128ExtraRightJamming( 4691129203Scognet bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); 4692129203Scognet zExp = aExp; 4693129203Scognet } 4694129203Scognet else if ( expDiff < 0 ) { 4695129203Scognet if ( bExp == 0x7FFF ) { 4696129203Scognet if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 4697129203Scognet return packFloat128( zSign, 0x7FFF, 0, 0 ); 4698129203Scognet } 4699129203Scognet if ( aExp == 0 ) { 4700129203Scognet ++expDiff; 4701129203Scognet } 4702129203Scognet else { 4703129203Scognet aSig0 |= LIT64( 0x0001000000000000 ); 4704129203Scognet } 4705129203Scognet shift128ExtraRightJamming( 4706129203Scognet aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); 4707129203Scognet zExp = bExp; 4708129203Scognet } 4709129203Scognet else { 4710129203Scognet if ( aExp == 0x7FFF ) { 4711129203Scognet if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 4712129203Scognet return propagateFloat128NaN( a, b ); 4713129203Scognet } 4714129203Scognet return a; 4715129203Scognet } 4716129203Scognet add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 4717129203Scognet if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 ); 4718129203Scognet zSig2 = 0; 4719129203Scognet zSig0 |= LIT64( 0x0002000000000000 ); 4720129203Scognet zExp = aExp; 4721129203Scognet goto shiftRight1; 4722129203Scognet } 4723129203Scognet aSig0 |= LIT64( 0x0001000000000000 ); 4724129203Scognet add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 4725129203Scognet --zExp; 4726129203Scognet if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; 4727129203Scognet ++zExp; 4728129203Scognet shiftRight1: 4729129203Scognet shift128ExtraRightJamming( 4730129203Scognet zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 4731129203Scognet roundAndPack: 4732129203Scognet return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 4733129203Scognet 4734129203Scognet} 4735129203Scognet 4736129203Scognet/* 4737129203Scognet------------------------------------------------------------------------------- 4738129203ScognetReturns the result of subtracting the absolute values of the quadruple- 4739129203Scognetprecision floating-point values `a' and `b'. If `zSign' is 1, the 4740129203Scognetdifference is negated before being returned. `zSign' is ignored if the 4741129203Scognetresult is a NaN. The subtraction is performed according to the IEC/IEEE 4742129203ScognetStandard for Binary Floating-Point Arithmetic. 4743129203Scognet------------------------------------------------------------------------------- 4744129203Scognet*/ 4745129203Scognetstatic float128 subFloat128Sigs( float128 a, float128 b, flag zSign ) 4746129203Scognet{ 4747129203Scognet int32 aExp, bExp, zExp; 4748129203Scognet bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; 4749129203Scognet int32 expDiff; 4750129203Scognet float128 z; 4751129203Scognet 4752129203Scognet aSig1 = extractFloat128Frac1( a ); 4753129203Scognet aSig0 = extractFloat128Frac0( a ); 4754129203Scognet aExp = extractFloat128Exp( a ); 4755129203Scognet bSig1 = extractFloat128Frac1( b ); 4756129203Scognet bSig0 = extractFloat128Frac0( b ); 4757129203Scognet bExp = extractFloat128Exp( b ); 4758129203Scognet expDiff = aExp - bExp; 4759129203Scognet shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); 4760129203Scognet shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); 4761129203Scognet if ( 0 < expDiff ) goto aExpBigger; 4762129203Scognet if ( expDiff < 0 ) goto bExpBigger; 4763129203Scognet if ( aExp == 0x7FFF ) { 4764129203Scognet if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 4765129203Scognet return propagateFloat128NaN( a, b ); 4766129203Scognet } 4767129203Scognet float_raise( float_flag_invalid ); 4768129203Scognet z.low = float128_default_nan_low; 4769129203Scognet z.high = float128_default_nan_high; 4770129203Scognet return z; 4771129203Scognet } 4772129203Scognet if ( aExp == 0 ) { 4773129203Scognet aExp = 1; 4774129203Scognet bExp = 1; 4775129203Scognet } 4776129203Scognet if ( bSig0 < aSig0 ) goto aBigger; 4777129203Scognet if ( aSig0 < bSig0 ) goto bBigger; 4778129203Scognet if ( bSig1 < aSig1 ) goto aBigger; 4779129203Scognet if ( aSig1 < bSig1 ) goto bBigger; 4780129203Scognet return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 ); 4781129203Scognet bExpBigger: 4782129203Scognet if ( bExp == 0x7FFF ) { 4783129203Scognet if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 4784129203Scognet return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); 4785129203Scognet } 4786129203Scognet if ( aExp == 0 ) { 4787129203Scognet ++expDiff; 4788129203Scognet } 4789129203Scognet else { 4790129203Scognet aSig0 |= LIT64( 0x4000000000000000 ); 4791129203Scognet } 4792129203Scognet shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 4793129203Scognet bSig0 |= LIT64( 0x4000000000000000 ); 4794129203Scognet bBigger: 4795129203Scognet sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); 4796129203Scognet zExp = bExp; 4797129203Scognet zSign ^= 1; 4798129203Scognet goto normalizeRoundAndPack; 4799129203Scognet aExpBigger: 4800129203Scognet if ( aExp == 0x7FFF ) { 4801129203Scognet if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 4802129203Scognet return a; 4803129203Scognet } 4804129203Scognet if ( bExp == 0 ) { 4805129203Scognet --expDiff; 4806129203Scognet } 4807129203Scognet else { 4808129203Scognet bSig0 |= LIT64( 0x4000000000000000 ); 4809129203Scognet } 4810129203Scognet shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); 4811129203Scognet aSig0 |= LIT64( 0x4000000000000000 ); 4812129203Scognet aBigger: 4813129203Scognet sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 4814129203Scognet zExp = aExp; 4815129203Scognet normalizeRoundAndPack: 4816129203Scognet --zExp; 4817129203Scognet return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 ); 4818129203Scognet 4819129203Scognet} 4820129203Scognet 4821129203Scognet/* 4822129203Scognet------------------------------------------------------------------------------- 4823129203ScognetReturns the result of adding the quadruple-precision floating-point values 4824129203Scognet`a' and `b'. The operation is performed according to the IEC/IEEE Standard 4825129203Scognetfor Binary Floating-Point Arithmetic. 4826129203Scognet------------------------------------------------------------------------------- 4827129203Scognet*/ 4828129203Scognetfloat128 float128_add( float128 a, float128 b ) 4829129203Scognet{ 4830129203Scognet flag aSign, bSign; 4831129203Scognet 4832129203Scognet aSign = extractFloat128Sign( a ); 4833129203Scognet bSign = extractFloat128Sign( b ); 4834129203Scognet if ( aSign == bSign ) { 4835129203Scognet return addFloat128Sigs( a, b, aSign ); 4836129203Scognet } 4837129203Scognet else { 4838129203Scognet return subFloat128Sigs( a, b, aSign ); 4839129203Scognet } 4840129203Scognet 4841129203Scognet} 4842129203Scognet 4843129203Scognet/* 4844129203Scognet------------------------------------------------------------------------------- 4845129203ScognetReturns the result of subtracting the quadruple-precision floating-point 4846129203Scognetvalues `a' and `b'. The operation is performed according to the IEC/IEEE 4847129203ScognetStandard for Binary Floating-Point Arithmetic. 4848129203Scognet------------------------------------------------------------------------------- 4849129203Scognet*/ 4850129203Scognetfloat128 float128_sub( float128 a, float128 b ) 4851129203Scognet{ 4852129203Scognet flag aSign, bSign; 4853129203Scognet 4854129203Scognet aSign = extractFloat128Sign( a ); 4855129203Scognet bSign = extractFloat128Sign( b ); 4856129203Scognet if ( aSign == bSign ) { 4857129203Scognet return subFloat128Sigs( a, b, aSign ); 4858129203Scognet } 4859129203Scognet else { 4860129203Scognet return addFloat128Sigs( a, b, aSign ); 4861129203Scognet } 4862129203Scognet 4863129203Scognet} 4864129203Scognet 4865129203Scognet/* 4866129203Scognet------------------------------------------------------------------------------- 4867129203ScognetReturns the result of multiplying the quadruple-precision floating-point 4868129203Scognetvalues `a' and `b'. The operation is performed according to the IEC/IEEE 4869129203ScognetStandard for Binary Floating-Point Arithmetic. 4870129203Scognet------------------------------------------------------------------------------- 4871129203Scognet*/ 4872129203Scognetfloat128 float128_mul( float128 a, float128 b ) 4873129203Scognet{ 4874129203Scognet flag aSign, bSign, zSign; 4875129203Scognet int32 aExp, bExp, zExp; 4876129203Scognet bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; 4877129203Scognet float128 z; 4878129203Scognet 4879129203Scognet aSig1 = extractFloat128Frac1( a ); 4880129203Scognet aSig0 = extractFloat128Frac0( a ); 4881129203Scognet aExp = extractFloat128Exp( a ); 4882129203Scognet aSign = extractFloat128Sign( a ); 4883129203Scognet bSig1 = extractFloat128Frac1( b ); 4884129203Scognet bSig0 = extractFloat128Frac0( b ); 4885129203Scognet bExp = extractFloat128Exp( b ); 4886129203Scognet bSign = extractFloat128Sign( b ); 4887129203Scognet zSign = aSign ^ bSign; 4888129203Scognet if ( aExp == 0x7FFF ) { 4889129203Scognet if ( ( aSig0 | aSig1 ) 4890129203Scognet || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { 4891129203Scognet return propagateFloat128NaN( a, b ); 4892129203Scognet } 4893129203Scognet if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; 4894129203Scognet return packFloat128( zSign, 0x7FFF, 0, 0 ); 4895129203Scognet } 4896129203Scognet if ( bExp == 0x7FFF ) { 4897129203Scognet if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 4898129203Scognet if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 4899129203Scognet invalid: 4900129203Scognet float_raise( float_flag_invalid ); 4901129203Scognet z.low = float128_default_nan_low; 4902129203Scognet z.high = float128_default_nan_high; 4903129203Scognet return z; 4904129203Scognet } 4905129203Scognet return packFloat128( zSign, 0x7FFF, 0, 0 ); 4906129203Scognet } 4907129203Scognet if ( aExp == 0 ) { 4908129203Scognet if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); 4909129203Scognet normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 4910129203Scognet } 4911129203Scognet if ( bExp == 0 ) { 4912129203Scognet if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); 4913129203Scognet normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 4914129203Scognet } 4915129203Scognet zExp = aExp + bExp - 0x4000; 4916129203Scognet aSig0 |= LIT64( 0x0001000000000000 ); 4917129203Scognet shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); 4918129203Scognet mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); 4919129203Scognet add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); 4920129203Scognet zSig2 |= ( zSig3 != 0 ); 4921129203Scognet if ( LIT64( 0x0002000000000000 ) <= zSig0 ) { 4922129203Scognet shift128ExtraRightJamming( 4923129203Scognet zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 4924129203Scognet ++zExp; 4925129203Scognet } 4926129203Scognet return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 4927129203Scognet 4928129203Scognet} 4929129203Scognet 4930129203Scognet/* 4931129203Scognet------------------------------------------------------------------------------- 4932129203ScognetReturns the result of dividing the quadruple-precision floating-point value 4933129203Scognet`a' by the corresponding value `b'. The operation is performed according to 4934129203Scognetthe IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4935129203Scognet------------------------------------------------------------------------------- 4936129203Scognet*/ 4937129203Scognetfloat128 float128_div( float128 a, float128 b ) 4938129203Scognet{ 4939129203Scognet flag aSign, bSign, zSign; 4940129203Scognet int32 aExp, bExp, zExp; 4941129203Scognet bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 4942129203Scognet bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 4943129203Scognet float128 z; 4944129203Scognet 4945129203Scognet aSig1 = extractFloat128Frac1( a ); 4946129203Scognet aSig0 = extractFloat128Frac0( a ); 4947129203Scognet aExp = extractFloat128Exp( a ); 4948129203Scognet aSign = extractFloat128Sign( a ); 4949129203Scognet bSig1 = extractFloat128Frac1( b ); 4950129203Scognet bSig0 = extractFloat128Frac0( b ); 4951129203Scognet bExp = extractFloat128Exp( b ); 4952129203Scognet bSign = extractFloat128Sign( b ); 4953129203Scognet zSign = aSign ^ bSign; 4954129203Scognet if ( aExp == 0x7FFF ) { 4955129203Scognet if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 4956129203Scognet if ( bExp == 0x7FFF ) { 4957129203Scognet if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 4958129203Scognet goto invalid; 4959129203Scognet } 4960129203Scognet return packFloat128( zSign, 0x7FFF, 0, 0 ); 4961129203Scognet } 4962129203Scognet if ( bExp == 0x7FFF ) { 4963129203Scognet if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 4964129203Scognet return packFloat128( zSign, 0, 0, 0 ); 4965129203Scognet } 4966129203Scognet if ( bExp == 0 ) { 4967129203Scognet if ( ( bSig0 | bSig1 ) == 0 ) { 4968129203Scognet if ( ( aExp | aSig0 | aSig1 ) == 0 ) { 4969129203Scognet invalid: 4970129203Scognet float_raise( float_flag_invalid ); 4971129203Scognet z.low = float128_default_nan_low; 4972129203Scognet z.high = float128_default_nan_high; 4973129203Scognet return z; 4974129203Scognet } 4975129203Scognet float_raise( float_flag_divbyzero ); 4976129203Scognet return packFloat128( zSign, 0x7FFF, 0, 0 ); 4977129203Scognet } 4978129203Scognet normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 4979129203Scognet } 4980129203Scognet if ( aExp == 0 ) { 4981129203Scognet if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); 4982129203Scognet normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 4983129203Scognet } 4984129203Scognet zExp = aExp - bExp + 0x3FFD; 4985129203Scognet shortShift128Left( 4986129203Scognet aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 ); 4987129203Scognet shortShift128Left( 4988129203Scognet bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); 4989129203Scognet if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { 4990129203Scognet shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); 4991129203Scognet ++zExp; 4992129203Scognet } 4993129203Scognet zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); 4994129203Scognet mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); 4995129203Scognet sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); 4996129203Scognet while ( (sbits64) rem0 < 0 ) { 4997129203Scognet --zSig0; 4998129203Scognet add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); 4999129203Scognet } 5000129203Scognet zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); 5001129203Scognet if ( ( zSig1 & 0x3FFF ) <= 4 ) { 5002129203Scognet mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); 5003129203Scognet sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); 5004129203Scognet while ( (sbits64) rem1 < 0 ) { 5005129203Scognet --zSig1; 5006129203Scognet add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); 5007129203Scognet } 5008129203Scognet zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 5009129203Scognet } 5010129203Scognet shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); 5011129203Scognet return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 5012129203Scognet 5013129203Scognet} 5014129203Scognet 5015129203Scognet/* 5016129203Scognet------------------------------------------------------------------------------- 5017129203ScognetReturns the remainder of the quadruple-precision floating-point value `a' 5018129203Scognetwith respect to the corresponding value `b'. The operation is performed 5019129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5020129203Scognet------------------------------------------------------------------------------- 5021129203Scognet*/ 5022129203Scognetfloat128 float128_rem( float128 a, float128 b ) 5023129203Scognet{ 5024129203Scognet flag aSign, bSign, zSign; 5025129203Scognet int32 aExp, bExp, expDiff; 5026129203Scognet bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; 5027129203Scognet bits64 allZero, alternateASig0, alternateASig1, sigMean1; 5028129203Scognet sbits64 sigMean0; 5029129203Scognet float128 z; 5030129203Scognet 5031129203Scognet aSig1 = extractFloat128Frac1( a ); 5032129203Scognet aSig0 = extractFloat128Frac0( a ); 5033129203Scognet aExp = extractFloat128Exp( a ); 5034129203Scognet aSign = extractFloat128Sign( a ); 5035129203Scognet bSig1 = extractFloat128Frac1( b ); 5036129203Scognet bSig0 = extractFloat128Frac0( b ); 5037129203Scognet bExp = extractFloat128Exp( b ); 5038129203Scognet bSign = extractFloat128Sign( b ); 5039129203Scognet if ( aExp == 0x7FFF ) { 5040129203Scognet if ( ( aSig0 | aSig1 ) 5041129203Scognet || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { 5042129203Scognet return propagateFloat128NaN( a, b ); 5043129203Scognet } 5044129203Scognet goto invalid; 5045129203Scognet } 5046129203Scognet if ( bExp == 0x7FFF ) { 5047129203Scognet if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 5048129203Scognet return a; 5049129203Scognet } 5050129203Scognet if ( bExp == 0 ) { 5051129203Scognet if ( ( bSig0 | bSig1 ) == 0 ) { 5052129203Scognet invalid: 5053129203Scognet float_raise( float_flag_invalid ); 5054129203Scognet z.low = float128_default_nan_low; 5055129203Scognet z.high = float128_default_nan_high; 5056129203Scognet return z; 5057129203Scognet } 5058129203Scognet normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); 5059129203Scognet } 5060129203Scognet if ( aExp == 0 ) { 5061129203Scognet if ( ( aSig0 | aSig1 ) == 0 ) return a; 5062129203Scognet normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 5063129203Scognet } 5064129203Scognet expDiff = aExp - bExp; 5065129203Scognet if ( expDiff < -1 ) return a; 5066129203Scognet shortShift128Left( 5067129203Scognet aSig0 | LIT64( 0x0001000000000000 ), 5068129203Scognet aSig1, 5069129203Scognet 15 - ( expDiff < 0 ), 5070129203Scognet &aSig0, 5071129203Scognet &aSig1 5072129203Scognet ); 5073129203Scognet shortShift128Left( 5074129203Scognet bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); 5075129203Scognet q = le128( bSig0, bSig1, aSig0, aSig1 ); 5076129203Scognet if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 5077129203Scognet expDiff -= 64; 5078129203Scognet while ( 0 < expDiff ) { 5079129203Scognet q = estimateDiv128To64( aSig0, aSig1, bSig0 ); 5080129203Scognet q = ( 4 < q ) ? q - 4 : 0; 5081129203Scognet mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); 5082129203Scognet shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); 5083129203Scognet shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); 5084129203Scognet sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); 5085129203Scognet expDiff -= 61; 5086129203Scognet } 5087129203Scognet if ( -64 < expDiff ) { 5088129203Scognet q = estimateDiv128To64( aSig0, aSig1, bSig0 ); 5089129203Scognet q = ( 4 < q ) ? q - 4 : 0; 5090129203Scognet q >>= - expDiff; 5091129203Scognet shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); 5092129203Scognet expDiff += 52; 5093129203Scognet if ( expDiff < 0 ) { 5094129203Scognet shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 5095129203Scognet } 5096129203Scognet else { 5097129203Scognet shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); 5098129203Scognet } 5099129203Scognet mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); 5100129203Scognet sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); 5101129203Scognet } 5102129203Scognet else { 5103129203Scognet shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); 5104129203Scognet shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); 5105129203Scognet } 5106129203Scognet do { 5107129203Scognet alternateASig0 = aSig0; 5108129203Scognet alternateASig1 = aSig1; 5109129203Scognet ++q; 5110129203Scognet sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); 5111129203Scognet } while ( 0 <= (sbits64) aSig0 ); 5112129203Scognet add128( 5113129203Scognet aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 ); 5114129203Scognet if ( ( sigMean0 < 0 ) 5115129203Scognet || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { 5116129203Scognet aSig0 = alternateASig0; 5117129203Scognet aSig1 = alternateASig1; 5118129203Scognet } 5119129203Scognet zSign = ( (sbits64) aSig0 < 0 ); 5120129203Scognet if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); 5121129203Scognet return 5122129203Scognet normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); 5123129203Scognet 5124129203Scognet} 5125129203Scognet 5126129203Scognet/* 5127129203Scognet------------------------------------------------------------------------------- 5128129203ScognetReturns the square root of the quadruple-precision floating-point value `a'. 5129129203ScognetThe operation is performed according to the IEC/IEEE Standard for Binary 5130129203ScognetFloating-Point Arithmetic. 5131129203Scognet------------------------------------------------------------------------------- 5132129203Scognet*/ 5133129203Scognetfloat128 float128_sqrt( float128 a ) 5134129203Scognet{ 5135129203Scognet flag aSign; 5136129203Scognet int32 aExp, zExp; 5137129203Scognet bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; 5138129203Scognet bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 5139129203Scognet float128 z; 5140129203Scognet 5141129203Scognet aSig1 = extractFloat128Frac1( a ); 5142129203Scognet aSig0 = extractFloat128Frac0( a ); 5143129203Scognet aExp = extractFloat128Exp( a ); 5144129203Scognet aSign = extractFloat128Sign( a ); 5145129203Scognet if ( aExp == 0x7FFF ) { 5146129203Scognet if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a ); 5147129203Scognet if ( ! aSign ) return a; 5148129203Scognet goto invalid; 5149129203Scognet } 5150129203Scognet if ( aSign ) { 5151129203Scognet if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; 5152129203Scognet invalid: 5153129203Scognet float_raise( float_flag_invalid ); 5154129203Scognet z.low = float128_default_nan_low; 5155129203Scognet z.high = float128_default_nan_high; 5156129203Scognet return z; 5157129203Scognet } 5158129203Scognet if ( aExp == 0 ) { 5159129203Scognet if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); 5160129203Scognet normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 5161129203Scognet } 5162129203Scognet zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE; 5163129203Scognet aSig0 |= LIT64( 0x0001000000000000 ); 5164129203Scognet zSig0 = estimateSqrt32( aExp, aSig0>>17 ); 5165129203Scognet shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); 5166129203Scognet zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); 5167129203Scognet doubleZSig0 = zSig0<<1; 5168129203Scognet mul64To128( zSig0, zSig0, &term0, &term1 ); 5169129203Scognet sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 5170129203Scognet while ( (sbits64) rem0 < 0 ) { 5171129203Scognet --zSig0; 5172129203Scognet doubleZSig0 -= 2; 5173129203Scognet add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); 5174129203Scognet } 5175129203Scognet zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); 5176129203Scognet if ( ( zSig1 & 0x1FFF ) <= 5 ) { 5177129203Scognet if ( zSig1 == 0 ) zSig1 = 1; 5178129203Scognet mul64To128( doubleZSig0, zSig1, &term1, &term2 ); 5179129203Scognet sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 5180129203Scognet mul64To128( zSig1, zSig1, &term2, &term3 ); 5181129203Scognet sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 5182129203Scognet while ( (sbits64) rem1 < 0 ) { 5183129203Scognet --zSig1; 5184129203Scognet shortShift128Left( 0, zSig1, 1, &term2, &term3 ); 5185129203Scognet term3 |= 1; 5186129203Scognet term2 |= doubleZSig0; 5187129203Scognet add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 5188129203Scognet } 5189129203Scognet zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 5190129203Scognet } 5191129203Scognet shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 ); 5192129203Scognet return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 ); 5193129203Scognet 5194129203Scognet} 5195129203Scognet 5196129203Scognet/* 5197129203Scognet------------------------------------------------------------------------------- 5198129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is equal to 5199129203Scognetthe corresponding value `b', and 0 otherwise. The comparison is performed 5200129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5201129203Scognet------------------------------------------------------------------------------- 5202129203Scognet*/ 5203129203Scognetflag float128_eq( float128 a, float128 b ) 5204129203Scognet{ 5205129203Scognet 5206129203Scognet if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5207129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5208129203Scognet || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5209129203Scognet && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5210129203Scognet ) { 5211129203Scognet if ( float128_is_signaling_nan( a ) 5212129203Scognet || float128_is_signaling_nan( b ) ) { 5213129203Scognet float_raise( float_flag_invalid ); 5214129203Scognet } 5215129203Scognet return 0; 5216129203Scognet } 5217129203Scognet return 5218129203Scognet ( a.low == b.low ) 5219129203Scognet && ( ( a.high == b.high ) 5220129203Scognet || ( ( a.low == 0 ) 5221129203Scognet && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) 5222129203Scognet ); 5223129203Scognet 5224129203Scognet} 5225129203Scognet 5226129203Scognet/* 5227129203Scognet------------------------------------------------------------------------------- 5228129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than 5229129203Scognetor equal to the corresponding value `b', and 0 otherwise. The comparison 5230129203Scognetis performed according to the IEC/IEEE Standard for Binary Floating-Point 5231129203ScognetArithmetic. 5232129203Scognet------------------------------------------------------------------------------- 5233129203Scognet*/ 5234129203Scognetflag float128_le( float128 a, float128 b ) 5235129203Scognet{ 5236129203Scognet flag aSign, bSign; 5237129203Scognet 5238129203Scognet if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5239129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5240129203Scognet || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5241129203Scognet && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5242129203Scognet ) { 5243129203Scognet float_raise( float_flag_invalid ); 5244129203Scognet return 0; 5245129203Scognet } 5246129203Scognet aSign = extractFloat128Sign( a ); 5247129203Scognet bSign = extractFloat128Sign( b ); 5248129203Scognet if ( aSign != bSign ) { 5249129203Scognet return 5250129203Scognet aSign 5251129203Scognet || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 5252129203Scognet == 0 ); 5253129203Scognet } 5254129203Scognet return 5255129203Scognet aSign ? le128( b.high, b.low, a.high, a.low ) 5256129203Scognet : le128( a.high, a.low, b.high, b.low ); 5257129203Scognet 5258129203Scognet} 5259129203Scognet 5260129203Scognet/* 5261129203Scognet------------------------------------------------------------------------------- 5262129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than 5263129203Scognetthe corresponding value `b', and 0 otherwise. The comparison is performed 5264129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5265129203Scognet------------------------------------------------------------------------------- 5266129203Scognet*/ 5267129203Scognetflag float128_lt( float128 a, float128 b ) 5268129203Scognet{ 5269129203Scognet flag aSign, bSign; 5270129203Scognet 5271129203Scognet if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5272129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5273129203Scognet || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5274129203Scognet && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5275129203Scognet ) { 5276129203Scognet float_raise( float_flag_invalid ); 5277129203Scognet return 0; 5278129203Scognet } 5279129203Scognet aSign = extractFloat128Sign( a ); 5280129203Scognet bSign = extractFloat128Sign( b ); 5281129203Scognet if ( aSign != bSign ) { 5282129203Scognet return 5283129203Scognet aSign 5284129203Scognet && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 5285129203Scognet != 0 ); 5286129203Scognet } 5287129203Scognet return 5288129203Scognet aSign ? lt128( b.high, b.low, a.high, a.low ) 5289129203Scognet : lt128( a.high, a.low, b.high, b.low ); 5290129203Scognet 5291129203Scognet} 5292129203Scognet 5293129203Scognet/* 5294129203Scognet------------------------------------------------------------------------------- 5295129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is equal to 5296129203Scognetthe corresponding value `b', and 0 otherwise. The invalid exception is 5297129203Scognetraised if either operand is a NaN. Otherwise, the comparison is performed 5298129203Scognetaccording to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 5299129203Scognet------------------------------------------------------------------------------- 5300129203Scognet*/ 5301129203Scognetflag float128_eq_signaling( float128 a, float128 b ) 5302129203Scognet{ 5303129203Scognet 5304129203Scognet if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5305129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5306129203Scognet || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5307129203Scognet && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5308129203Scognet ) { 5309129203Scognet float_raise( float_flag_invalid ); 5310129203Scognet return 0; 5311129203Scognet } 5312129203Scognet return 5313129203Scognet ( a.low == b.low ) 5314129203Scognet && ( ( a.high == b.high ) 5315129203Scognet || ( ( a.low == 0 ) 5316129203Scognet && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) 5317129203Scognet ); 5318129203Scognet 5319129203Scognet} 5320129203Scognet 5321129203Scognet/* 5322129203Scognet------------------------------------------------------------------------------- 5323129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than 5324129203Scognetor equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 5325129203Scognetcause an exception. Otherwise, the comparison is performed according to the 5326129203ScognetIEC/IEEE Standard for Binary Floating-Point Arithmetic. 5327129203Scognet------------------------------------------------------------------------------- 5328129203Scognet*/ 5329129203Scognetflag float128_le_quiet( float128 a, float128 b ) 5330129203Scognet{ 5331129203Scognet flag aSign, bSign; 5332129203Scognet 5333129203Scognet if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5334129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5335129203Scognet || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5336129203Scognet && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5337129203Scognet ) { 5338129203Scognet if ( float128_is_signaling_nan( a ) 5339129203Scognet || float128_is_signaling_nan( b ) ) { 5340129203Scognet float_raise( float_flag_invalid ); 5341129203Scognet } 5342129203Scognet return 0; 5343129203Scognet } 5344129203Scognet aSign = extractFloat128Sign( a ); 5345129203Scognet bSign = extractFloat128Sign( b ); 5346129203Scognet if ( aSign != bSign ) { 5347129203Scognet return 5348129203Scognet aSign 5349129203Scognet || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 5350129203Scognet == 0 ); 5351129203Scognet } 5352129203Scognet return 5353129203Scognet aSign ? le128( b.high, b.low, a.high, a.low ) 5354129203Scognet : le128( a.high, a.low, b.high, b.low ); 5355129203Scognet 5356129203Scognet} 5357129203Scognet 5358129203Scognet/* 5359129203Scognet------------------------------------------------------------------------------- 5360129203ScognetReturns 1 if the quadruple-precision floating-point value `a' is less than 5361129203Scognetthe corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 5362129203Scognetexception. Otherwise, the comparison is performed according to the IEC/IEEE 5363129203ScognetStandard for Binary Floating-Point Arithmetic. 5364129203Scognet------------------------------------------------------------------------------- 5365129203Scognet*/ 5366129203Scognetflag float128_lt_quiet( float128 a, float128 b ) 5367129203Scognet{ 5368129203Scognet flag aSign, bSign; 5369129203Scognet 5370129203Scognet if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) 5371129203Scognet && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) 5372129203Scognet || ( ( extractFloat128Exp( b ) == 0x7FFF ) 5373129203Scognet && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) 5374129203Scognet ) { 5375129203Scognet if ( float128_is_signaling_nan( a ) 5376129203Scognet || float128_is_signaling_nan( b ) ) { 5377129203Scognet float_raise( float_flag_invalid ); 5378129203Scognet } 5379129203Scognet return 0; 5380129203Scognet } 5381129203Scognet aSign = extractFloat128Sign( a ); 5382129203Scognet bSign = extractFloat128Sign( b ); 5383129203Scognet if ( aSign != bSign ) { 5384129203Scognet return 5385129203Scognet aSign 5386129203Scognet && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 5387129203Scognet != 0 ); 5388129203Scognet } 5389129203Scognet return 5390129203Scognet aSign ? lt128( b.high, b.low, a.high, a.low ) 5391129203Scognet : lt128( a.high, a.low, b.high, b.low ); 5392129203Scognet 5393129203Scognet} 5394129203Scognet 5395129203Scognet#endif 5396129203Scognet 5397129203Scognet 5398129203Scognet#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS) 5399129203Scognet 5400129203Scognet/* 5401129203Scognet * These two routines are not part of the original softfloat distribution. 5402129203Scognet * 5403129203Scognet * They are based on the corresponding conversions to integer but return 5404129203Scognet * unsigned numbers instead since these functions are required by GCC. 5405129203Scognet * 5406129203Scognet * Added by Mark Brinicombe <mark@NetBSD.org> 27/09/97 5407129203Scognet * 5408129203Scognet * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15] 5409129203Scognet */ 5410129203Scognet 5411129203Scognet/* 5412129203Scognet------------------------------------------------------------------------------- 5413129203ScognetReturns the result of converting the double-precision floating-point value 5414129203Scognet`a' to the 32-bit unsigned integer format. The conversion is 5415129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-point 5416129203ScognetArithmetic, except that the conversion is always rounded toward zero. If 5417129203Scognet`a' is a NaN, the largest positive integer is returned. If the conversion 5418129203Scognetoverflows, the largest integer positive is returned. 5419129203Scognet------------------------------------------------------------------------------- 5420129203Scognet*/ 5421129203Scognetuint32 float64_to_uint32_round_to_zero( float64 a ) 5422129203Scognet{ 5423129203Scognet flag aSign; 5424129203Scognet int16 aExp, shiftCount; 5425129203Scognet bits64 aSig, savedASig; 5426129203Scognet uint32 z; 5427129203Scognet 5428129203Scognet aSig = extractFloat64Frac( a ); 5429129203Scognet aExp = extractFloat64Exp( a ); 5430129203Scognet aSign = extractFloat64Sign( a ); 5431129203Scognet 5432129203Scognet if (aSign) { 5433129203Scognet float_raise( float_flag_invalid ); 5434129203Scognet return(0); 5435129203Scognet } 5436129203Scognet 5437129203Scognet if ( 0x41E < aExp ) { 5438129203Scognet float_raise( float_flag_invalid ); 5439129203Scognet return 0xffffffff; 5440129203Scognet } 5441129203Scognet else if ( aExp < 0x3FF ) { 5442129203Scognet if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; 5443129203Scognet return 0; 5444129203Scognet } 5445129203Scognet aSig |= LIT64( 0x0010000000000000 ); 5446129203Scognet shiftCount = 0x433 - aExp; 5447129203Scognet savedASig = aSig; 5448129203Scognet aSig >>= shiftCount; 5449129203Scognet z = aSig; 5450129203Scognet if ( ( aSig<<shiftCount ) != savedASig ) { 5451129203Scognet float_exception_flags |= float_flag_inexact; 5452129203Scognet } 5453129203Scognet return z; 5454129203Scognet 5455129203Scognet} 5456129203Scognet 5457129203Scognet/* 5458129203Scognet------------------------------------------------------------------------------- 5459129203ScognetReturns the result of converting the single-precision floating-point value 5460129203Scognet`a' to the 32-bit unsigned integer format. The conversion is 5461129203Scognetperformed according to the IEC/IEEE Standard for Binary Floating-point 5462129203ScognetArithmetic, except that the conversion is always rounded toward zero. If 5463129203Scognet`a' is a NaN, the largest positive integer is returned. If the conversion 5464129203Scognetoverflows, the largest positive integer is returned. 5465129203Scognet------------------------------------------------------------------------------- 5466129203Scognet*/ 5467129203Scognetuint32 float32_to_uint32_round_to_zero( float32 a ) 5468129203Scognet{ 5469129203Scognet flag aSign; 5470129203Scognet int16 aExp, shiftCount; 5471129203Scognet bits32 aSig; 5472129203Scognet uint32 z; 5473129203Scognet 5474129203Scognet aSig = extractFloat32Frac( a ); 5475129203Scognet aExp = extractFloat32Exp( a ); 5476129203Scognet aSign = extractFloat32Sign( a ); 5477129203Scognet shiftCount = aExp - 0x9E; 5478129203Scognet 5479129203Scognet if (aSign) { 5480129203Scognet float_raise( float_flag_invalid ); 5481129203Scognet return(0); 5482129203Scognet } 5483129203Scognet if ( 0 < shiftCount ) { 5484129203Scognet float_raise( float_flag_invalid ); 5485129203Scognet return 0xFFFFFFFF; 5486129203Scognet } 5487129203Scognet else if ( aExp <= 0x7E ) { 5488129203Scognet if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; 5489129203Scognet return 0; 5490129203Scognet } 5491129203Scognet aSig = ( aSig | 0x800000 )<<8; 5492129203Scognet z = aSig>>( - shiftCount ); 5493129203Scognet if ( aSig<<( shiftCount & 31 ) ) { 5494129203Scognet float_exception_flags |= float_flag_inexact; 5495129203Scognet } 5496129203Scognet return z; 5497129203Scognet 5498129203Scognet} 5499129203Scognet 5500129203Scognet#endif 5501