1
2/*
3===============================================================================
4
5This C source file is part of TestFloat, Release 2a, a package of programs
6for testing the correctness of floating-point arithmetic complying to the
7IEC/IEEE Standard for Floating-Point.
8
9Written by John R. Hauser.  More information is available through the Web
10page `http://HTTP.CS.Berkeley.EDU/~jhauser/arithmetic/TestFloat.html'.
11
12THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
13has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
14TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
15PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
16AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
17
18Derivative works are acceptable, even for commercial purposes, so long as
19(1) they include prominent notice that the work is derivative, and (2) they
20include prominent notice akin to these four paragraphs for those parts of
21this code that are retained.
22
23===============================================================================
24*/
25
26int8 slow_float_rounding_mode;
27int8 slow_float_exception_flags;
28int8 slow_float_detect_tininess;
29
30typedef struct {
31    bits32 a0, a1;
32} bits64X;
33
34typedef struct {
35    flag isNaN;
36    flag isInf;
37    flag isZero;
38    flag sign;
39    int16 exp;
40    bits64X sig;
41} floatX;
42
43static const floatX floatXNaN = { TRUE, FALSE, FALSE, FALSE, 0, { 0, 0 } };
44static const floatX floatXPositiveZero =
45    { FALSE, FALSE, TRUE, FALSE, 0, { 0, 0 } };
46static const floatX floatXNegativeZero =
47    { FALSE, FALSE, TRUE, TRUE, 0, { 0, 0 } };
48
49static bits64X shortShift64Left( bits64X a, int8 shiftCount )
50{
51    int8 negShiftCount;
52
53    negShiftCount = ( - shiftCount & 31 );
54    a.a0 = ( a.a0<<shiftCount ) | ( a.a1>>negShiftCount );
55    a.a1 <<= shiftCount;
56    return a;
57
58}
59
60static bits64X shortShift64RightJamming( bits64X a, int8 shiftCount )
61{
62    int8 negShiftCount;
63    bits32 extra;
64
65    negShiftCount = ( - shiftCount & 31 );
66    extra = a.a1<<negShiftCount;
67    a.a1 = ( a.a0<<negShiftCount ) | ( a.a1>>shiftCount ) | ( extra != 0 );
68    a.a0 >>= shiftCount;
69    return a;
70
71}
72
73static bits64X neg64( bits64X a )
74{
75
76    if ( a.a1 == 0 ) {
77        a.a0 = - a.a0;
78    }
79    else {
80        a.a1 = - a.a1;
81        a.a0 = ~ a.a0;
82    }
83    return a;
84
85}
86
87static bits64X add64( bits64X a, bits64X b )
88{
89
90    a.a1 += b.a1;
91    a.a0 += b.a0 + ( a.a1 < b.a1 );
92    return a;
93
94}
95
96static flag eq64( bits64X a, bits64X b )
97{
98
99    return ( a.a0 == b.a0 ) && ( a.a1 == b.a1 );
100
101}
102
103static flag le64( bits64X a, bits64X b )
104{
105
106    return ( a.a0 < b.a0 ) || ( ( a.a0 == b.a0 ) && ( a.a1 <= b.a1 ) );
107
108}
109
110static flag lt64( bits64X a, bits64X b )
111{
112
113    return ( a.a0 < b.a0 ) || ( ( a.a0 == b.a0 ) && ( a.a1 < b.a1 ) );
114
115}
116
117static floatX roundFloatXTo24( flag isTiny, floatX zx )
118{
119
120    if ( zx.sig.a1 ) {
121        slow_float_exception_flags |= float_flag_inexact;
122        if ( isTiny ) slow_float_exception_flags |= float_flag_underflow;
123        switch ( slow_float_rounding_mode ) {
124         case float_round_nearest_even:
125            if ( zx.sig.a1 < 0x80000000 ) goto noIncrement;
126            if ( ( zx.sig.a1 == 0x80000000 ) && ! ( zx.sig.a0 & 1 ) ) {
127                goto noIncrement;
128            }
129            break;
130         case float_round_to_zero:
131            goto noIncrement;
132         case float_round_down:
133            if ( ! zx.sign ) goto noIncrement;
134            break;
135         case float_round_up:
136            if ( zx.sign ) goto noIncrement;
137            break;
138        }
139        ++zx.sig.a0;
140        if ( zx.sig.a0 == 0x01000000 ) {
141            zx.sig.a0 = 0x00800000;
142            ++zx.exp;
143        }
144    }
145 noIncrement:
146    zx.sig.a1 = 0;
147    return zx;
148
149}
150
151static floatX roundFloatXTo53( flag isTiny, floatX zx )
152{
153    int8 roundBits;
154
155    roundBits = zx.sig.a1 & 7;
156    zx.sig.a1 -= roundBits;
157    if ( roundBits ) {
158        slow_float_exception_flags |= float_flag_inexact;
159        if ( isTiny ) slow_float_exception_flags |= float_flag_underflow;
160        switch ( slow_float_rounding_mode ) {
161         case float_round_nearest_even:
162            if ( roundBits < 4 ) goto noIncrement;
163            if ( ( roundBits == 4 ) && ! ( zx.sig.a1 & 8 ) ) goto noIncrement;
164            break;
165         case float_round_to_zero:
166            goto noIncrement;
167         case float_round_down:
168            if ( ! zx.sign ) goto noIncrement;
169            break;
170         case float_round_up:
171            if ( zx.sign ) goto noIncrement;
172            break;
173        }
174        zx.sig.a1 += 8;
175        zx.sig.a0 += ( zx.sig.a1 == 0 );
176        if ( zx.sig.a0 == 0x01000000 ) {
177            zx.sig.a0 = 0x00800000;
178            ++zx.exp;
179        }
180    }
181 noIncrement:
182    return zx;
183
184}
185
186static floatX int32ToFloatX( int32 a )
187{
188    floatX ax;
189
190    ax.isNaN = FALSE;
191    ax.isInf = FALSE;
192    ax.sign = ( a < 0 );
193    ax.sig.a1 = ax.sign ? - a : a;
194    ax.sig.a0 = 0;
195    if ( a == 0 ) {
196        ax.isZero = TRUE;
197        return ax;
198    }
199    ax.isZero = FALSE;
200    ax.sig = shortShift64Left( ax.sig, 23 );
201    ax.exp = 32;
202    while ( ax.sig.a0 < 0x00800000 ) {
203        ax.sig = shortShift64Left( ax.sig, 1 );
204        --ax.exp;
205    }
206    return ax;
207
208}
209
210static int32 floatXToInt32( floatX ax )
211{
212    int8 savedExceptionFlags;
213    int16 shiftCount;
214    int32 z;
215
216    if ( ax.isInf || ax.isNaN ) {
217        slow_float_exception_flags |= float_flag_invalid;
218        return ( ax.isInf & ax.sign ) ? 0x80000000 : 0x7FFFFFFF;
219    }
220    if ( ax.isZero ) return 0;
221    savedExceptionFlags = slow_float_exception_flags;
222    shiftCount = 52 - ax.exp;
223    if ( 56 < shiftCount ) {
224        ax.sig.a1 = 1;
225        ax.sig.a0 = 0;
226    }
227    else {
228        while ( 0 < shiftCount ) {
229            ax.sig = shortShift64RightJamming( ax.sig, 1 );
230            --shiftCount;
231        }
232    }
233    ax = roundFloatXTo53( FALSE, ax );
234    ax.sig = shortShift64RightJamming( ax.sig, 3 );
235    z = ax.sig.a1;
236    if ( ax.sign ) z = - z;
237    if (    ( shiftCount < 0 )
238         || ax.sig.a0
239         || ( ( z != 0 ) && ( ( ax.sign ^ ( z < 0 ) ) != 0 ) )
240       ) {
241        slow_float_exception_flags = savedExceptionFlags | float_flag_invalid;
242        return ax.sign ? 0x80000000 : 0x7FFFFFFF;
243    }
244    return z;
245
246}
247
248static floatX float32ToFloatX( float32 a )
249{
250    int16 expField;
251    floatX ax;
252
253    ax.isNaN = FALSE;
254    ax.isInf = FALSE;
255    ax.isZero = FALSE;
256    ax.sign = ( ( a & 0x80000000 ) != 0 );
257    expField = ( a>>23 ) & 0xFF;
258    ax.sig.a1 = 0;
259    ax.sig.a0 = a & 0x007FFFFF;
260    if ( expField == 0 ) {
261        if ( ax.sig.a0 == 0 ) {
262            ax.isZero = TRUE;
263        }
264        else {
265            expField = 1 - 0x7F;
266            do {
267                ax.sig.a0 <<= 1;
268                --expField;
269            } while ( ax.sig.a0 < 0x00800000 );
270            ax.exp = expField;
271        }
272    }
273    else if ( expField == 0xFF ) {
274        if ( ax.sig.a0 == 0 ) {
275            ax.isInf = TRUE;
276        }
277        else {
278            ax.isNaN = TRUE;
279        }
280    }
281    else {
282        ax.sig.a0 |= 0x00800000;
283        ax.exp = expField - 0x7F;
284    }
285    return ax;
286
287}
288
289static float32 floatXToFloat32( floatX zx )
290{
291    floatX savedZ;
292    flag isTiny;
293    int16 expField;
294    float32 z;
295
296    if ( zx.isZero ) return zx.sign ? 0x80000000 : 0;
297    if ( zx.isInf ) return zx.sign ? 0xFF800000 : 0x7F800000;
298    if ( zx.isNaN ) return 0xFFFFFFFF;
299    while ( 0x01000000 <= zx.sig.a0 ) {
300        zx.sig = shortShift64RightJamming( zx.sig, 1 );
301        ++zx.exp;
302    }
303    while ( zx.sig.a0 < 0x00800000 ) {
304        zx.sig = shortShift64Left( zx.sig, 1 );
305        --zx.exp;
306    }
307    savedZ = zx;
308    isTiny =
309           ( slow_float_detect_tininess == float_tininess_before_rounding )
310        && ( zx.exp + 0x7F <= 0 );
311    zx = roundFloatXTo24( isTiny, zx );
312    expField = zx.exp + 0x7F;
313    if ( 0xFF <= expField ) {
314        slow_float_exception_flags |=
315            float_flag_overflow | float_flag_inexact;
316        if ( zx.sign ) {
317            switch ( slow_float_rounding_mode ) {
318             case float_round_nearest_even:
319             case float_round_down:
320                z = 0xFF800000;
321                break;
322             case float_round_to_zero:
323             case float_round_up:
324                z = 0xFF7FFFFF;
325                break;
326            }
327        }
328        else {
329            switch ( slow_float_rounding_mode ) {
330             case float_round_nearest_even:
331             case float_round_up:
332                z = 0x7F800000;
333                break;
334             case float_round_to_zero:
335             case float_round_down:
336                z = 0x7F7FFFFF;
337                break;
338            }
339        }
340        return z;
341    }
342    if ( expField <= 0 ) {
343        isTiny = TRUE;
344        zx = savedZ;
345        expField = zx.exp + 0x7F;
346        if ( expField < -27 ) {
347            zx.sig.a1 = ( zx.sig.a0 != 0 ) || ( zx.sig.a1 != 0 );
348            zx.sig.a0 = 0;
349        }
350        else {
351            while ( expField <= 0 ) {
352                zx.sig = shortShift64RightJamming( zx.sig, 1 );
353                ++expField;
354            }
355        }
356        zx = roundFloatXTo24( isTiny, zx );
357        expField = ( 0x00800000 <= zx.sig.a0 ) ? 1 : 0;
358    }
359    z = expField;
360    z <<= 23;
361    if ( zx.sign ) z |= 0x80000000;
362    z |= zx.sig.a0 & 0x007FFFFF;
363    return z;
364
365}
366
367static floatX float64ToFloatX( float64 a )
368{
369    int16 expField;
370    floatX ax;
371
372    ax.isNaN = FALSE;
373    ax.isInf = FALSE;
374    ax.isZero = FALSE;
375#ifdef BITS64
376    ax.sign = ( ( a & LIT64( 0x8000000000000000 ) ) != 0 );
377    expField = ( a>>52 ) & 0x7FF;
378    ax.sig.a1 = a;
379    ax.sig.a0 = ( a>>32 ) & 0x000FFFFF;
380#else
381    ax.sign = ( ( a.high & 0x80000000 ) != 0 );
382    expField = ( a.high>>( 52 - 32 ) ) & 0x7FF;
383    ax.sig.a1 = a.low;
384    ax.sig.a0 = a.high & 0x000FFFFF;
385#endif
386    if ( expField == 0 ) {
387        if ( ( ax.sig.a0 == 0 ) && ( ax.sig.a1 == 0 ) ) {
388            ax.isZero = TRUE;
389        }
390        else {
391            expField = 1 - 0x3FF;
392            do {
393                ax.sig = shortShift64Left( ax.sig, 1 );
394                --expField;
395            } while ( ax.sig.a0 < 0x00100000 );
396            ax.exp = expField;
397        }
398    }
399    else if ( expField == 0x7FF ) {
400        if ( ( ax.sig.a0 == 0 ) && ( ax.sig.a1 == 0 ) ) {
401            ax.isInf = TRUE;
402        }
403        else {
404            ax.isNaN = TRUE;
405        }
406    }
407    else {
408        ax.exp = expField - 0x3FF;
409        ax.sig.a0 |= 0x00100000;
410    }
411    ax.sig = shortShift64Left( ax.sig, 3 );
412    return ax;
413
414}
415
416static float64 floatXToFloat64( floatX zx )
417{
418    floatX savedZ;
419    flag isTiny;
420    int16 expField;
421    float64 z;
422
423#ifdef BITS64
424    if ( zx.isZero ) return zx.sign ? LIT64( 0x8000000000000000 ) : 0;
425    if ( zx.isInf ) {
426        return
427              zx.sign ? LIT64( 0xFFF0000000000000 )
428            : LIT64( 0x7FF0000000000000 );
429    }
430    if ( zx.isNaN ) return LIT64( 0xFFFFFFFFFFFFFFFF );
431#else
432    if ( zx.isZero ) {
433        z.low = 0;
434        z.high = zx.sign ? 0x80000000 : 0;
435        return z;
436    }
437    if ( zx.isInf ) {
438        z.low = 0;
439        z.high = zx.sign ? 0xFFF00000 : 0x7FF00000;
440        return z;
441    }
442    if ( zx.isNaN ) {
443        z.high = z.low = 0xFFFFFFFF;
444        return z;
445    }
446#endif
447    while ( 0x01000000 <= zx.sig.a0 ) {
448        zx.sig = shortShift64RightJamming( zx.sig, 1 );
449        ++zx.exp;
450    }
451    while ( zx.sig.a0 < 0x00800000 ) {
452        zx.sig = shortShift64Left( zx.sig, 1 );
453        --zx.exp;
454    }
455    savedZ = zx;
456    isTiny =
457           ( slow_float_detect_tininess == float_tininess_before_rounding )
458        && ( zx.exp + 0x3FF <= 0 );
459    zx = roundFloatXTo53( isTiny, zx );
460    expField = zx.exp + 0x3FF;
461    if ( 0x7FF <= expField ) {
462        slow_float_exception_flags |=
463            float_flag_overflow | float_flag_inexact;
464#ifdef BITS64
465        if ( zx.sign ) {
466            switch ( slow_float_rounding_mode ) {
467             case float_round_nearest_even:
468             case float_round_down:
469                z = LIT64( 0xFFF0000000000000 );
470                break;
471             case float_round_to_zero:
472             case float_round_up:
473                z = LIT64( 0xFFEFFFFFFFFFFFFF );
474                break;
475            }
476        }
477        else {
478            switch ( slow_float_rounding_mode ) {
479             case float_round_nearest_even:
480             case float_round_up:
481                z = LIT64( 0x7FF0000000000000 );
482                break;
483             case float_round_to_zero:
484             case float_round_down:
485                z = LIT64( 0x7FEFFFFFFFFFFFFF );
486                break;
487            }
488        }
489#else
490        if ( zx.sign ) {
491            switch ( slow_float_rounding_mode ) {
492             case float_round_nearest_even:
493             case float_round_down:
494                z.low = 0;
495                z.high = 0xFFF00000;
496                break;
497             case float_round_to_zero:
498             case float_round_up:
499                z.low = 0xFFFFFFFF;
500                z.high = 0xFFEFFFFF;
501                break;
502            }
503        }
504        else {
505            switch ( slow_float_rounding_mode ) {
506             case float_round_nearest_even:
507             case float_round_up:
508                z.low = 0;
509                z.high = 0x7FF00000;
510                break;
511             case float_round_to_zero:
512             case float_round_down:
513                z.low = 0xFFFFFFFF;
514                z.high = 0x7FEFFFFF;
515                break;
516            }
517        }
518#endif
519        return z;
520    }
521    if ( expField <= 0 ) {
522        isTiny = TRUE;
523        zx = savedZ;
524        expField = zx.exp + 0x3FF;
525        if ( expField < -56 ) {
526            zx.sig.a1 = ( zx.sig.a0 != 0 ) || ( zx.sig.a1 != 0 );
527            zx.sig.a0 = 0;
528        }
529        else {
530            while ( expField <= 0 ) {
531                zx.sig = shortShift64RightJamming( zx.sig, 1 );
532                ++expField;
533            }
534        }
535        zx = roundFloatXTo53( isTiny, zx );
536        expField = ( 0x00800000 <= zx.sig.a0 ) ? 1 : 0;
537    }
538    zx.sig = shortShift64RightJamming( zx.sig, 3 );
539#ifdef BITS64
540    z = expField;
541    z <<= 52;
542    if ( zx.sign ) z |= LIT64( 0x8000000000000000 );
543    z |= ( ( (bits64) ( zx.sig.a0 & 0x000FFFFF ) )<<32 ) | zx.sig.a1;
544#else
545    z.low = zx.sig.a1;
546    z.high = expField;
547    z.high <<= 52 - 32;
548    if ( zx.sign ) z.high |= 0x80000000;
549    z.high |= zx.sig.a0 & 0x000FFFFF;
550#endif
551    return z;
552
553}
554
555static floatX floatXInvalid( void )
556{
557
558    slow_float_exception_flags |= float_flag_invalid;
559    return floatXNaN;
560
561}
562
563static floatX floatXRoundToInt( floatX ax )
564{
565    int16 shiftCount, i;
566
567    if ( ax.isNaN || ax.isInf ) return ax;
568    shiftCount = 52 - ax.exp;
569    if ( shiftCount <= 0 ) return ax;
570    if ( 55 < shiftCount ) {
571        ax.exp = 52;
572        ax.sig.a1 = ! ax.isZero;
573        ax.sig.a0 = 0;
574    }
575    else {
576        while ( 0 < shiftCount ) {
577            ax.sig = shortShift64RightJamming( ax.sig, 1 );
578            ++ax.exp;
579            --shiftCount;
580        }
581    }
582    ax = roundFloatXTo53( FALSE, ax );
583    if ( ( ax.sig.a0 == 0 ) && ( ax.sig.a1 == 0 ) ) ax.isZero = TRUE;
584    return ax;
585
586}
587
588static floatX floatXAdd( floatX ax, floatX bx )
589{
590    int16 expDiff;
591    floatX zx;
592
593    if ( ax.isNaN ) return ax;
594    if ( bx.isNaN ) return bx;
595    if ( ax.isInf && bx.isInf ) {
596        if ( ax.sign == bx.sign ) return ax;
597        return floatXInvalid();
598    }
599    if ( ax.isInf ) return ax;
600    if ( bx.isInf ) return bx;
601    if ( ax.isZero && bx.isZero ) {
602        if ( ax.sign == bx.sign ) return ax;
603        goto completeCancellation;
604    }
605    if (    ( ax.sign != bx.sign )
606         && ( ax.exp == bx.exp )
607         && eq64( ax.sig, bx.sig )
608       ) {
609 completeCancellation:
610        return
611              ( slow_float_rounding_mode == float_round_down ) ?
612                  floatXNegativeZero
613            : floatXPositiveZero;
614    }
615    if ( ax.isZero ) return bx;
616    if ( bx.isZero ) return ax;
617    expDiff = ax.exp - bx.exp;
618    if ( expDiff < 0 ) {
619        zx = ax;
620        zx.exp = bx.exp;
621        if ( expDiff < -56 ) {
622            zx.sig.a1 = 1;
623            zx.sig.a0 = 0;
624        }
625        else {
626            while ( expDiff < 0 ) {
627                zx.sig = shortShift64RightJamming( zx.sig, 1 );
628                ++expDiff;
629            }
630        }
631        if ( ax.sign != bx.sign ) zx.sig = neg64( zx.sig );
632        zx.sign = bx.sign;
633        zx.sig = add64( zx.sig, bx.sig );
634    }
635    else {
636        zx = bx;
637        zx.exp = ax.exp;
638        if ( 56 < expDiff ) {
639            zx.sig.a1 = 1;
640            zx.sig.a0 = 0;
641        }
642        else {
643            while ( 0 < expDiff ) {
644                zx.sig = shortShift64RightJamming( zx.sig, 1 );
645                --expDiff;
646            }
647        }
648        if ( ax.sign != bx.sign ) zx.sig = neg64( zx.sig );
649        zx.sign = ax.sign;
650        zx.sig = add64( zx.sig, ax.sig );
651    }
652    if ( zx.sig.a0 & 0x80000000 ) {
653        zx.sig = neg64( zx.sig );
654        zx.sign = ! zx.sign;
655    }
656    return zx;
657
658}
659
660static floatX floatXMul( floatX ax, floatX bx )
661{
662    int8 bitNum;
663    floatX zx;
664
665    if ( ax.isNaN ) return ax;
666    if ( bx.isNaN ) return bx;
667    if ( ax.isInf ) {
668        if ( bx.isZero ) return floatXInvalid();
669        if ( bx.sign ) ax.sign = ! ax.sign;
670        return ax;
671    }
672    if ( bx.isInf ) {
673        if ( ax.isZero ) return floatXInvalid();
674        if ( ax.sign ) bx.sign = ! bx.sign;
675        return bx;
676    }
677    zx = ax;
678    zx.sign ^= bx.sign;
679    if ( ax.isZero || bx.isZero ) {
680        return zx.sign ? floatXNegativeZero : floatXPositiveZero;
681    }
682    zx.exp += bx.exp + 1;
683    zx.sig.a1 = 0;
684    zx.sig.a0 = 0;
685    for ( bitNum = 0; bitNum < 55; ++bitNum ) {
686        if ( bx.sig.a1 & 2 ) zx.sig = add64( zx.sig, ax.sig );
687        bx.sig = shortShift64RightJamming( bx.sig, 1 );
688        zx.sig = shortShift64RightJamming( zx.sig, 1 );
689    }
690    return zx;
691
692}
693
694static floatX floatXDiv( floatX ax, floatX bx )
695{
696    bits64X negBSig;
697    int8 bitNum;
698    floatX zx;
699
700    if ( ax.isNaN ) return ax;
701    if ( bx.isNaN ) return bx;
702    if ( ax.isInf ) {
703        if ( bx.isInf ) return floatXInvalid();
704        if ( bx.sign ) ax.sign = ! ax.sign;
705        return ax;
706    }
707    if ( bx.isZero ) {
708        if ( ax.isZero ) return floatXInvalid();
709        slow_float_exception_flags |= float_flag_divbyzero;
710        if ( ax.sign ) bx.sign = ! bx.sign;
711        bx.isZero = FALSE;
712        bx.isInf = TRUE;
713        return bx;
714    }
715    zx = ax;
716    zx.sign ^= bx.sign;
717    if ( ax.isZero || bx.isInf ) {
718        return zx.sign ? floatXNegativeZero : floatXPositiveZero;
719    }
720    zx.exp -= bx.exp + 1;
721    zx.sig.a1 = 0;
722    zx.sig.a0 = 0;
723    negBSig = neg64( bx.sig );
724    for ( bitNum = 0; bitNum < 56; ++bitNum ) {
725        if ( le64( bx.sig, ax.sig ) ) {
726            zx.sig.a1 |= 1;
727            ax.sig = add64( ax.sig, negBSig );
728        }
729        ax.sig = shortShift64Left( ax.sig, 1 );
730        zx.sig = shortShift64Left( zx.sig, 1 );
731    }
732    if ( ax.sig.a0 || ax.sig.a1 ) zx.sig.a1 |= 1;
733    return zx;
734
735}
736
737static floatX floatXRem( floatX ax, floatX bx )
738{
739    bits64X negBSig;
740    flag lastQuotientBit;
741    bits64X savedASig;
742
743    if ( ax.isNaN ) return ax;
744    if ( bx.isNaN ) return bx;
745    if ( ax.isInf || bx.isZero ) return floatXInvalid();
746    if ( ax.isZero || bx.isInf ) return ax;
747    --bx.exp;
748    if ( ax.exp < bx.exp ) return ax;
749    bx.sig = shortShift64Left( bx.sig, 1 );
750    negBSig = neg64( bx.sig );
751    while ( bx.exp < ax.exp ) {
752        if ( le64( bx.sig, ax.sig ) ) ax.sig = add64( ax.sig, negBSig );
753        ax.sig = shortShift64Left( ax.sig, 1 );
754        --ax.exp;
755    }
756    lastQuotientBit = le64( bx.sig, ax.sig );
757    if ( lastQuotientBit ) ax.sig = add64( ax.sig, negBSig );
758    savedASig = ax.sig;
759    ax.sig = neg64( add64( ax.sig, negBSig ) );
760    if ( lt64( ax.sig, savedASig ) ) {
761        ax.sign = ! ax.sign;
762    }
763    else if ( lt64( savedASig, ax.sig ) ) {
764        ax.sig = savedASig;
765    }
766    else {
767        if ( lastQuotientBit ) {
768            ax.sign = ! ax.sign;
769        }
770        else {
771            ax.sig = savedASig;
772        }
773    }
774    if ( ( ax.sig.a0 == 0 ) && ( ax.sig.a1 == 0 ) ) ax.isZero = TRUE;
775    return ax;
776
777}
778
779static floatX floatXSqrt( floatX ax )
780{
781    int8 bitNum;
782    bits64X bitSig, savedASig;
783    floatX zx;
784
785    if ( ax.isNaN || ax.isZero ) return ax;
786    if ( ax.sign ) return floatXInvalid();
787    if ( ax.isInf ) return ax;
788    zx = ax;
789    zx.exp >>= 1;
790    if ( ( ax.exp & 1 ) == 0 ) ax.sig = shortShift64RightJamming( ax.sig, 1 );
791    zx.sig.a1 = 0;
792    zx.sig.a0 = 0;
793    bitSig.a1 = 0;
794    bitSig.a0 = 0x00800000;
795    for ( bitNum = 0; bitNum < 56; ++bitNum ) {
796        savedASig = ax.sig;
797        ax.sig = add64( ax.sig, neg64( zx.sig ) );
798        ax.sig = shortShift64Left( ax.sig, 1 );
799        ax.sig = add64( ax.sig, neg64( bitSig ) );
800        if ( ax.sig.a0 & 0x80000000 ) {
801            ax.sig = shortShift64Left( savedASig, 1 );
802        }
803        else {
804            zx.sig.a1 |= bitSig.a1;
805            zx.sig.a0 |= bitSig.a0;
806        }
807        bitSig = shortShift64RightJamming( bitSig, 1 );
808    }
809    if ( ax.sig.a0 || ax.sig.a1 ) zx.sig.a1 |= 1;
810    return zx;
811
812}
813
814static flag floatXEq( floatX ax, floatX bx )
815{
816
817    if ( ax.isNaN || bx.isNaN ) return FALSE;
818    if ( ax.isZero && bx.isZero ) return TRUE;
819    if ( ax.sign != bx.sign ) return FALSE;
820    if ( ax.isInf || bx.isInf ) return ax.isInf && bx.isInf;
821    return ( ax.exp == bx.exp ) && eq64( ax.sig, bx.sig );
822
823}
824
825static flag floatXLe( floatX ax, floatX bx )
826{
827
828    if ( ax.isNaN || bx.isNaN ) return FALSE;
829    if ( ax.isZero && bx.isZero ) return TRUE;
830    if ( ax.sign != bx.sign ) return ax.sign;
831    if ( ax.sign ) {
832        if ( ax.isInf || bx.isZero ) return TRUE;
833        if ( bx.isInf || ax.isZero ) return FALSE;
834        if ( bx.exp < ax.exp ) return TRUE;
835        if ( ax.exp < bx.exp ) return FALSE;
836        return le64( bx.sig, ax.sig );
837    }
838    else {
839        if ( bx.isInf || ax.isZero ) return TRUE;
840        if ( ax.isInf || bx.isZero ) return FALSE;
841        if ( ax.exp < bx.exp ) return TRUE;
842        if ( bx.exp < ax.exp ) return FALSE;
843        return le64( ax.sig, bx.sig );
844    }
845
846}
847
848static flag floatXLt( floatX ax, floatX bx )
849{
850
851    if ( ax.isNaN || bx.isNaN ) return FALSE;
852    if ( ax.isZero && bx.isZero ) return FALSE;
853    if ( ax.sign != bx.sign ) return ax.sign;
854    if ( ax.isInf && bx.isInf ) return FALSE;
855    if ( ax.sign ) {
856        if ( ax.isInf || bx.isZero ) return TRUE;
857        if ( bx.isInf || ax.isZero ) return FALSE;
858        if ( bx.exp < ax.exp ) return TRUE;
859        if ( ax.exp < bx.exp ) return FALSE;
860        return lt64( bx.sig, ax.sig );
861    }
862    else {
863        if ( bx.isInf || ax.isZero ) return TRUE;
864        if ( ax.isInf || bx.isZero ) return FALSE;
865        if ( ax.exp < bx.exp ) return TRUE;
866        if ( bx.exp < ax.exp ) return FALSE;
867        return lt64( ax.sig, bx.sig );
868    }
869
870}
871
872float32 slow_int32_to_float32( int32 a )
873{
874
875    return floatXToFloat32( int32ToFloatX( a ) );
876
877}
878
879float64 slow_int32_to_float64( int32 a )
880{
881
882    return floatXToFloat64( int32ToFloatX( a ) );
883
884}
885
886int32 slow_float32_to_int32( float32 a )
887{
888
889    return floatXToInt32( float32ToFloatX( a ) );
890
891}
892
893int32 slow_float32_to_int32_round_to_zero( float32 a )
894{
895    int8 savedRoundingMode;
896    int32 z;
897
898    savedRoundingMode = slow_float_rounding_mode;
899    slow_float_rounding_mode = float_round_to_zero;
900    z = floatXToInt32( float32ToFloatX( a ) );
901    slow_float_rounding_mode = savedRoundingMode;
902    return z;
903
904}
905
906float64 slow_float32_to_float64( float32 a )
907{
908
909    return floatXToFloat64( float32ToFloatX( a ) );
910
911}
912
913float32 slow_float32_round_to_int( float32 a )
914{
915
916    return floatXToFloat32( floatXRoundToInt( float32ToFloatX( a ) ) );
917
918}
919
920float32 slow_float32_add( float32 a, float32 b )
921{
922
923    return
924        floatXToFloat32(
925            floatXAdd( float32ToFloatX( a ), float32ToFloatX( b ) ) );
926
927}
928
929float32 slow_float32_sub( float32 a, float32 b )
930{
931
932    b ^= 0x80000000;
933    return
934        floatXToFloat32(
935            floatXAdd( float32ToFloatX( a ), float32ToFloatX( b ) ) );
936
937}
938
939float32 slow_float32_mul( float32 a, float32 b )
940{
941
942    return
943        floatXToFloat32(
944            floatXMul( float32ToFloatX( a ), float32ToFloatX( b ) ) );
945
946}
947
948float32 slow_float32_div( float32 a, float32 b )
949{
950
951    return
952        floatXToFloat32(
953            floatXDiv( float32ToFloatX( a ), float32ToFloatX( b ) ) );
954
955}
956
957float32 slow_float32_rem( float32 a, float32 b )
958{
959
960    return
961        floatXToFloat32(
962            floatXRem( float32ToFloatX( a ), float32ToFloatX( b ) ) );
963
964}
965
966float32 slow_float32_sqrt( float32 a )
967{
968
969    return floatXToFloat32( floatXSqrt( float32ToFloatX( a ) ) );
970
971}
972
973flag slow_float32_eq( float32 a, float32 b )
974{
975
976    return floatXEq( float32ToFloatX( a ), float32ToFloatX( b ) );
977
978}
979
980flag slow_float32_le( float32 a, float32 b )
981{
982    floatX ax, bx;
983
984    ax = float32ToFloatX( a );
985    bx = float32ToFloatX( b );
986    if ( ax.isNaN || bx.isNaN ) {
987        slow_float_exception_flags |= float_flag_invalid;
988    }
989    return floatXLe( ax, bx );
990
991}
992
993flag slow_float32_lt( float32 a, float32 b )
994{
995    floatX ax, bx;
996
997    ax = float32ToFloatX( a );
998    bx = float32ToFloatX( b );
999    if ( ax.isNaN || bx.isNaN ) {
1000        slow_float_exception_flags |= float_flag_invalid;
1001    }
1002    return floatXLt( ax, bx );
1003
1004}
1005
1006flag slow_float32_eq_signaling( float32 a, float32 b )
1007{
1008    floatX ax, bx;
1009
1010    ax = float32ToFloatX( a );
1011    bx = float32ToFloatX( b );
1012    if ( ax.isNaN || bx.isNaN ) {
1013        slow_float_exception_flags |= float_flag_invalid;
1014    }
1015    return floatXEq( ax, bx );
1016
1017}
1018
1019flag slow_float32_le_quiet( float32 a, float32 b )
1020{
1021
1022    return floatXLe( float32ToFloatX( a ), float32ToFloatX( b ) );
1023
1024}
1025
1026flag slow_float32_lt_quiet( float32 a, float32 b )
1027{
1028
1029    return floatXLt( float32ToFloatX( a ), float32ToFloatX( b ) );
1030
1031}
1032
1033int32 slow_float64_to_int32( float64 a )
1034{
1035
1036    return floatXToInt32( float64ToFloatX( a ) );
1037
1038}
1039
1040int32 slow_float64_to_int32_round_to_zero( float64 a )
1041{
1042    int8 savedRoundingMode;
1043    int32 z;
1044
1045    savedRoundingMode = slow_float_rounding_mode;
1046    slow_float_rounding_mode = float_round_to_zero;
1047    z = floatXToInt32( float64ToFloatX( a ) );
1048    slow_float_rounding_mode = savedRoundingMode;
1049    return z;
1050
1051}
1052
1053float32 slow_float64_to_float32( float64 a )
1054{
1055
1056    return floatXToFloat32( float64ToFloatX( a ) );
1057
1058}
1059
1060float64 slow_float64_round_to_int( float64 a )
1061{
1062
1063    return floatXToFloat64( floatXRoundToInt( float64ToFloatX( a ) ) );
1064
1065}
1066
1067float64 slow_float64_add( float64 a, float64 b )
1068{
1069
1070    return
1071        floatXToFloat64(
1072            floatXAdd( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1073
1074}
1075
1076float64 slow_float64_sub( float64 a, float64 b )
1077{
1078
1079#ifdef BITS64
1080    b ^= LIT64( 0x8000000000000000 );
1081#else
1082    b.high ^= 0x80000000;
1083#endif
1084    return
1085        floatXToFloat64(
1086            floatXAdd( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1087
1088}
1089
1090float64 slow_float64_mul( float64 a, float64 b )
1091{
1092
1093    return
1094        floatXToFloat64(
1095            floatXMul( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1096
1097}
1098
1099float64 slow_float64_div( float64 a, float64 b )
1100{
1101
1102    return
1103        floatXToFloat64(
1104            floatXDiv( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1105
1106}
1107
1108float64 slow_float64_rem( float64 a, float64 b )
1109{
1110
1111    return
1112        floatXToFloat64(
1113            floatXRem( float64ToFloatX( a ), float64ToFloatX( b ) ) );
1114
1115}
1116
1117float64 slow_float64_sqrt( float64 a )
1118{
1119
1120    return floatXToFloat64( floatXSqrt( float64ToFloatX( a ) ) );
1121
1122}
1123
1124flag slow_float64_eq( float64 a, float64 b )
1125{
1126
1127    return floatXEq( float64ToFloatX( a ), float64ToFloatX( b ) );
1128
1129}
1130
1131flag slow_float64_le( float64 a, float64 b )
1132{
1133    floatX ax, bx;
1134
1135    ax = float64ToFloatX( a );
1136    bx = float64ToFloatX( b );
1137    if ( ax.isNaN || bx.isNaN ) {
1138        slow_float_exception_flags |= float_flag_invalid;
1139    }
1140    return floatXLe( ax, bx );
1141
1142}
1143
1144flag slow_float64_lt( float64 a, float64 b )
1145{
1146    floatX ax, bx;
1147
1148    ax = float64ToFloatX( a );
1149    bx = float64ToFloatX( b );
1150    if ( ax.isNaN || bx.isNaN ) {
1151        slow_float_exception_flags |= float_flag_invalid;
1152    }
1153    return floatXLt( ax, bx );
1154
1155}
1156
1157flag slow_float64_eq_signaling( float64 a, float64 b )
1158{
1159    floatX ax, bx;
1160
1161    ax = float64ToFloatX( a );
1162    bx = float64ToFloatX( b );
1163    if ( ax.isNaN || bx.isNaN ) {
1164        slow_float_exception_flags |= float_flag_invalid;
1165    }
1166    return floatXEq( ax, bx );
1167
1168}
1169
1170flag slow_float64_le_quiet( float64 a, float64 b )
1171{
1172
1173    return floatXLe( float64ToFloatX( a ), float64ToFloatX( b ) );
1174
1175}
1176
1177flag slow_float64_lt_quiet( float64 a, float64 b )
1178{
1179
1180    return floatXLt( float64ToFloatX( a ), float64ToFloatX( b ) );
1181
1182}
1183
1184