1
2/*============================================================================
3
4This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
5Package, Release 3e, by John R. Hauser.
6
7Copyright 2011, 2012, 2013, 2014, 2017 The Regents of the University of
8California.  All rights reserved.
9
10Redistribution and use in source and binary forms, with or without
11modification, are permitted provided that the following conditions are met:
12
13 1. Redistributions of source code must retain the above copyright notice,
14    this list of conditions, and the following disclaimer.
15
16 2. Redistributions in binary form must reproduce the above copyright notice,
17    this list of conditions, and the following disclaimer in the documentation
18    and/or other materials provided with the distribution.
19
20 3. Neither the name of the University nor the names of its contributors may
21    be used to endorse or promote products derived from this software without
22    specific prior written permission.
23
24THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
25EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
26WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
27DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
28DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
31ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
35=============================================================================*/
36
37#include <stdbool.h>
38#include <stdint.h>
39#include "platform.h"
40#include "internals.h"
41#include "specialize.h"
42#include "softfloat.h"
43
44#ifdef SOFTFLOAT_FAST_INT64
45
46void f128M_sqrt( const float128_t *aPtr, float128_t *zPtr )
47{
48
49    *zPtr = f128_sqrt( *aPtr );
50
51}
52
53#else
54
55void f128M_sqrt( const float128_t *aPtr, float128_t *zPtr )
56{
57    const uint32_t *aWPtr;
58    uint32_t *zWPtr;
59    uint32_t uiA96;
60    bool signA;
61    int32_t rawExpA;
62    uint32_t rem[6];
63    int32_t expA, expZ;
64    uint64_t rem64;
65    uint32_t sig32A, recipSqrt32, sig32Z, qs[3], q;
66    uint64_t sig64Z;
67    uint32_t term[5];
68    uint64_t x64;
69    uint32_t y[5], rem32;
70
71    /*------------------------------------------------------------------------
72    *------------------------------------------------------------------------*/
73    aWPtr = (const uint32_t *) aPtr;
74    zWPtr = (uint32_t *) zPtr;
75    /*------------------------------------------------------------------------
76    *------------------------------------------------------------------------*/
77    uiA96 = aWPtr[indexWordHi( 4 )];
78    signA = signF128UI96( uiA96 );
79    rawExpA  = expF128UI96( uiA96 );
80    /*------------------------------------------------------------------------
81    *------------------------------------------------------------------------*/
82    if ( rawExpA == 0x7FFF ) {
83        if (
84            fracF128UI96( uiA96 )
85                || (aWPtr[indexWord( 4, 2 )] | aWPtr[indexWord( 4, 1 )]
86                        | aWPtr[indexWord( 4, 0 )])
87        ) {
88            softfloat_propagateNaNF128M( aWPtr, 0, zWPtr );
89            return;
90        }
91        if ( ! signA ) goto copyA;
92        goto invalid;
93    }
94    /*------------------------------------------------------------------------
95    *------------------------------------------------------------------------*/
96    expA = softfloat_shiftNormSigF128M( aWPtr, 13 - (rawExpA & 1), rem );
97    if ( expA == -128 ) goto copyA;
98    if ( signA ) goto invalid;
99    /*------------------------------------------------------------------------
100    | (`sig32Z' is guaranteed to be a lower bound on the square root of
101    | `sig32A', which makes `sig32Z' also a lower bound on the square root of
102    | `sigA'.)
103    *------------------------------------------------------------------------*/
104    expZ = ((expA - 0x3FFF)>>1) + 0x3FFE;
105    expA &= 1;
106    rem64 = (uint64_t) rem[indexWord( 4, 3 )]<<32 | rem[indexWord( 4, 2 )];
107    if ( expA ) {
108        if ( ! rawExpA ) {
109            softfloat_shortShiftRight128M( rem, 1, rem );
110            rem64 >>= 1;
111        }
112        sig32A = rem64>>29;
113    } else {
114        sig32A = rem64>>30;
115    }
116    recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
117    sig32Z = ((uint64_t) sig32A * recipSqrt32)>>32;
118    if ( expA ) sig32Z >>= 1;
119    qs[2] = sig32Z;
120    rem64 -= (uint64_t) sig32Z * sig32Z;
121    rem[indexWord( 4, 3 )] = rem64>>32;
122    rem[indexWord( 4, 2 )] = rem64;
123    /*------------------------------------------------------------------------
124    *------------------------------------------------------------------------*/
125    q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
126    sig64Z = ((uint64_t) sig32Z<<32) + ((uint64_t) q<<3);
127    term[indexWord( 4, 3 )] = 0;
128    term[indexWord( 4, 0 )] = 0;
129    /*------------------------------------------------------------------------
130    | (Repeating this loop is a rare occurrence.)
131    *------------------------------------------------------------------------*/
132    for (;;) {
133        x64 = ((uint64_t) sig32Z<<32) + sig64Z;
134        term[indexWord( 4, 2 )] = x64>>32;
135        term[indexWord( 4, 1 )] = x64;
136        softfloat_remStep128MBy32( rem, 29, term, q, y );
137        rem32 = y[indexWord( 4, 3 )];
138        if ( ! (rem32 & 0x80000000) ) break;
139        --q;
140        sig64Z -= 1<<3;
141    }
142    qs[1] = q;
143    rem64 = (uint64_t) rem32<<32 | y[indexWord( 4, 2 )];
144    /*------------------------------------------------------------------------
145    *------------------------------------------------------------------------*/
146    q = ((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32;
147    if ( rem64>>34 ) q += recipSqrt32;
148    sig64Z <<= 1;
149    /*------------------------------------------------------------------------
150    | (Repeating this loop is a rare occurrence.)
151    *------------------------------------------------------------------------*/
152    for (;;) {
153        x64 = sig64Z + (q>>26);
154        term[indexWord( 4, 2 )] = x64>>32;
155        term[indexWord( 4, 1 )] = x64;
156        term[indexWord( 4, 0 )] = q<<6;
157        softfloat_remStep128MBy32(
158            y, 29, term, q, &rem[indexMultiwordHi( 6, 4 )] );
159        rem32 = rem[indexWordHi( 6 )];
160        if ( ! (rem32 & 0x80000000) ) break;
161        --q;
162    }
163    qs[0] = q;
164    rem64 = (uint64_t) rem32<<32 | rem[indexWord( 6, 4 )];
165    /*------------------------------------------------------------------------
166    *------------------------------------------------------------------------*/
167    q = (((uint32_t) (rem64>>2) * (uint64_t) recipSqrt32)>>32) + 2;
168    if ( rem64>>34 ) q += recipSqrt32;
169    x64 = (uint64_t) q<<27;
170    y[indexWord( 5, 0 )] = x64;
171    x64 = ((uint64_t) qs[0]<<24) + (x64>>32);
172    y[indexWord( 5, 1 )] = x64;
173    x64 = ((uint64_t) qs[1]<<21) + (x64>>32);
174    y[indexWord( 5, 2 )] = x64;
175    x64 = ((uint64_t) qs[2]<<18) + (x64>>32);
176    y[indexWord( 5, 3 )] = x64;
177    y[indexWord( 5, 4 )] = x64>>32;
178    /*------------------------------------------------------------------------
179    *------------------------------------------------------------------------*/
180    if ( (q & 0xF) <= 2 ) {
181        q &= ~3;
182        y[indexWordLo( 5 )] = q<<27;
183        term[indexWord( 5, 4 )] = 0;
184        term[indexWord( 5, 3 )] = 0;
185        term[indexWord( 5, 2 )] = 0;
186        term[indexWord( 5, 1 )] = q>>6;
187        term[indexWord( 5, 0 )] = q<<26;
188        softfloat_sub160M( y, term, term );
189        rem[indexWord( 6, 1 )] = 0;
190        rem[indexWord( 6, 0 )] = 0;
191        softfloat_remStep160MBy32(
192            &rem[indexMultiwordLo( 6, 5 )],
193            14,
194            term,
195            q,
196            &rem[indexMultiwordLo( 6, 5 )]
197        );
198        rem32 = rem[indexWord( 6, 4 )];
199        if ( rem32 & 0x80000000 ) {
200            softfloat_sub1X160M( y );
201        } else {
202            if (
203                rem32 || rem[indexWord( 6, 0 )] || rem[indexWord( 6, 1 )]
204                    || (rem[indexWord( 6, 3 )] | rem[indexWord( 6, 2 )])
205            ) {
206                y[indexWordLo( 5 )] |= 1;
207            }
208        }
209    }
210    softfloat_roundPackMToF128M( 0, expZ, y, zWPtr );
211    return;
212    /*------------------------------------------------------------------------
213    *------------------------------------------------------------------------*/
214 invalid:
215    softfloat_invalidF128M( zWPtr );
216    return;
217    /*------------------------------------------------------------------------
218    *------------------------------------------------------------------------*/
219 copyA:
220    zWPtr[indexWordHi( 4 )] = uiA96;
221    zWPtr[indexWord( 4, 2 )] = aWPtr[indexWord( 4, 2 )];
222    zWPtr[indexWord( 4, 1 )] = aWPtr[indexWord( 4, 1 )];
223    zWPtr[indexWord( 4, 0 )] = aWPtr[indexWord( 4, 0 )];
224
225}
226
227#endif
228
229