1230363Sdas/* $NetBSD: softfloat-macros,v 1.2 2009/02/16 10:23:35 tron Exp $ */
2129203Scognet/* $FreeBSD$ */
3129203Scognet
4129203Scognet/*
5129203Scognet===============================================================================
6129203Scognet
7129203ScognetThis C source fragment is part of the SoftFloat IEC/IEEE Floating-point
8129203ScognetArithmetic Package, Release 2a.
9129203Scognet
10129203ScognetWritten by John R. Hauser.  This work was made possible in part by the
11129203ScognetInternational Computer Science Institute, located at Suite 600, 1947 Center
12129203ScognetStreet, Berkeley, California 94704.  Funding was partially provided by the
13129203ScognetNational Science Foundation under grant MIP-9311980.  The original version
14129203Scognetof this code was written as part of a project to build a fixed-point vector
15129203Scognetprocessor in collaboration with the University of California at Berkeley,
16129203Scognetoverseen by Profs. Nelson Morgan and John Wawrzynek.  More information
17129203Scognetis available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
18129203Scognetarithmetic/SoftFloat.html'.
19129203Scognet
20129203ScognetTHIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
21129203Scognethas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
22129203ScognetTIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
23129203ScognetPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
24129203ScognetAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
25129203Scognet
26129203ScognetDerivative works are acceptable, even for commercial purposes, so long as
27129203Scognet(1) they include prominent notice that the work is derivative, and (2) they
28129203Scognetinclude prominent notice akin to these four paragraphs for those parts of
29129203Scognetthis code that are retained.
30129203Scognet
31129203Scognet===============================================================================
32129203Scognet*/
33129203Scognet
34129203Scognet/*
35129203Scognet-------------------------------------------------------------------------------
36129203ScognetShifts `a' right by the number of bits given in `count'.  If any nonzero
37129203Scognetbits are shifted off, they are ``jammed'' into the least significant bit of
38129203Scognetthe result by setting the least significant bit to 1.  The value of `count'
39129203Scognetcan be arbitrarily large; in particular, if `count' is greater than 32, the
40129203Scognetresult will be either 0 or 1, depending on whether `a' is zero or nonzero.
41129203ScognetThe result is stored in the location pointed to by `zPtr'.
42129203Scognet-------------------------------------------------------------------------------
43129203Scognet*/
44129203ScognetINLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr )
45129203Scognet{
46129203Scognet    bits32 z;
47129203Scognet
48129203Scognet    if ( count == 0 ) {
49129203Scognet        z = a;
50129203Scognet    }
51129203Scognet    else if ( count < 32 ) {
52129203Scognet        z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
53129203Scognet    }
54129203Scognet    else {
55129203Scognet        z = ( a != 0 );
56129203Scognet    }
57129203Scognet    *zPtr = z;
58129203Scognet
59129203Scognet}
60129203Scognet
61129203Scognet/*
62129203Scognet-------------------------------------------------------------------------------
63129203ScognetShifts `a' right by the number of bits given in `count'.  If any nonzero
64129203Scognetbits are shifted off, they are ``jammed'' into the least significant bit of
65129203Scognetthe result by setting the least significant bit to 1.  The value of `count'
66129203Scognetcan be arbitrarily large; in particular, if `count' is greater than 64, the
67129203Scognetresult will be either 0 or 1, depending on whether `a' is zero or nonzero.
68129203ScognetThe result is stored in the location pointed to by `zPtr'.
69129203Scognet-------------------------------------------------------------------------------
70129203Scognet*/
71129203ScognetINLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr )
72129203Scognet{
73129203Scognet    bits64 z;
74129203Scognet
75129203Scognet    if ( count == 0 ) {
76129203Scognet        z = a;
77129203Scognet    }
78129203Scognet    else if ( count < 64 ) {
79129203Scognet        z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
80129203Scognet    }
81129203Scognet    else {
82129203Scognet        z = ( a != 0 );
83129203Scognet    }
84129203Scognet    *zPtr = z;
85129203Scognet
86129203Scognet}
87129203Scognet
88129203Scognet/*
89129203Scognet-------------------------------------------------------------------------------
90129203ScognetShifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
91129203Scognet_plus_ the number of bits given in `count'.  The shifted result is at most
92129203Scognet64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
93129203Scognetbits shifted off form a second 64-bit result as follows:  The _last_ bit
94129203Scognetshifted off is the most-significant bit of the extra result, and the other
95129203Scognet63 bits of the extra result are all zero if and only if _all_but_the_last_
96129203Scognetbits shifted off were all zero.  This extra result is stored in the location
97129203Scognetpointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
98129203Scognet    (This routine makes more sense if `a0' and `a1' are considered to form a
99129203Scognetfixed-point value with binary point between `a0' and `a1'.  This fixed-point
100129203Scognetvalue is shifted right by the number of bits given in `count', and the
101129203Scognetinteger part of the result is returned at the location pointed to by
102129203Scognet`z0Ptr'.  The fractional part of the result may be slightly corrupted as
103129203Scognetdescribed above, and is returned at the location pointed to by `z1Ptr'.)
104129203Scognet-------------------------------------------------------------------------------
105129203Scognet*/
106129203ScognetINLINE void
107129203Scognet shift64ExtraRightJamming(
108129203Scognet     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
109129203Scognet{
110129203Scognet    bits64 z0, z1;
111129203Scognet    int8 negCount = ( - count ) & 63;
112129203Scognet
113129203Scognet    if ( count == 0 ) {
114129203Scognet        z1 = a1;
115129203Scognet        z0 = a0;
116129203Scognet    }
117129203Scognet    else if ( count < 64 ) {
118129203Scognet        z1 = ( a0<<negCount ) | ( a1 != 0 );
119129203Scognet        z0 = a0>>count;
120129203Scognet    }
121129203Scognet    else {
122129203Scognet        if ( count == 64 ) {
123129203Scognet            z1 = a0 | ( a1 != 0 );
124129203Scognet        }
125129203Scognet        else {
126129203Scognet            z1 = ( ( a0 | a1 ) != 0 );
127129203Scognet        }
128129203Scognet        z0 = 0;
129129203Scognet    }
130129203Scognet    *z1Ptr = z1;
131129203Scognet    *z0Ptr = z0;
132129203Scognet
133129203Scognet}
134129203Scognet
135129203Scognet/*
136129203Scognet-------------------------------------------------------------------------------
137129203ScognetShifts the 128-bit value formed by concatenating `a0' and `a1' right by the
138129203Scognetnumber of bits given in `count'.  Any bits shifted off are lost.  The value
139129203Scognetof `count' can be arbitrarily large; in particular, if `count' is greater
140129203Scognetthan 128, the result will be 0.  The result is broken into two 64-bit pieces
141129203Scognetwhich are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
142129203Scognet-------------------------------------------------------------------------------
143129203Scognet*/
144129203ScognetINLINE void
145129203Scognet shift128Right(
146129203Scognet     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
147129203Scognet{
148129203Scognet    bits64 z0, z1;
149129203Scognet    int8 negCount = ( - count ) & 63;
150129203Scognet
151129203Scognet    if ( count == 0 ) {
152129203Scognet        z1 = a1;
153129203Scognet        z0 = a0;
154129203Scognet    }
155129203Scognet    else if ( count < 64 ) {
156129203Scognet        z1 = ( a0<<negCount ) | ( a1>>count );
157129203Scognet        z0 = a0>>count;
158129203Scognet    }
159129203Scognet    else {
160129203Scognet        z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
161129203Scognet        z0 = 0;
162129203Scognet    }
163129203Scognet    *z1Ptr = z1;
164129203Scognet    *z0Ptr = z0;
165129203Scognet
166129203Scognet}
167129203Scognet
168129203Scognet/*
169129203Scognet-------------------------------------------------------------------------------
170129203ScognetShifts the 128-bit value formed by concatenating `a0' and `a1' right by the
171129203Scognetnumber of bits given in `count'.  If any nonzero bits are shifted off, they
172129203Scognetare ``jammed'' into the least significant bit of the result by setting the
173129203Scognetleast significant bit to 1.  The value of `count' can be arbitrarily large;
174129203Scognetin particular, if `count' is greater than 128, the result will be either
175129203Scognet0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
176129203Scognetnonzero.  The result is broken into two 64-bit pieces which are stored at
177129203Scognetthe locations pointed to by `z0Ptr' and `z1Ptr'.
178129203Scognet-------------------------------------------------------------------------------
179129203Scognet*/
180129203ScognetINLINE void
181129203Scognet shift128RightJamming(
182129203Scognet     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
183129203Scognet{
184129203Scognet    bits64 z0, z1;
185129203Scognet    int8 negCount = ( - count ) & 63;
186129203Scognet
187129203Scognet    if ( count == 0 ) {
188129203Scognet        z1 = a1;
189129203Scognet        z0 = a0;
190129203Scognet    }
191129203Scognet    else if ( count < 64 ) {
192129203Scognet        z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
193129203Scognet        z0 = a0>>count;
194129203Scognet    }
195129203Scognet    else {
196129203Scognet        if ( count == 64 ) {
197129203Scognet            z1 = a0 | ( a1 != 0 );
198129203Scognet        }
199129203Scognet        else if ( count < 128 ) {
200129203Scognet            z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
201129203Scognet        }
202129203Scognet        else {
203129203Scognet            z1 = ( ( a0 | a1 ) != 0 );
204129203Scognet        }
205129203Scognet        z0 = 0;
206129203Scognet    }
207129203Scognet    *z1Ptr = z1;
208129203Scognet    *z0Ptr = z0;
209129203Scognet
210129203Scognet}
211129203Scognet
212129203Scognet/*
213129203Scognet-------------------------------------------------------------------------------
214129203ScognetShifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
215129203Scognetby 64 _plus_ the number of bits given in `count'.  The shifted result is
216129203Scognetat most 128 nonzero bits; these are broken into two 64-bit pieces which are
217129203Scognetstored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
218129203Scognetoff form a third 64-bit result as follows:  The _last_ bit shifted off is
219129203Scognetthe most-significant bit of the extra result, and the other 63 bits of the
220129203Scognetextra result are all zero if and only if _all_but_the_last_ bits shifted off
221129203Scognetwere all zero.  This extra result is stored in the location pointed to by
222129203Scognet`z2Ptr'.  The value of `count' can be arbitrarily large.
223129203Scognet    (This routine makes more sense if `a0', `a1', and `a2' are considered
224129203Scognetto form a fixed-point value with binary point between `a1' and `a2'.  This
225129203Scognetfixed-point value is shifted right by the number of bits given in `count',
226129203Scognetand the integer part of the result is returned at the locations pointed to
227129203Scognetby `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
228129203Scognetcorrupted as described above, and is returned at the location pointed to by
229129203Scognet`z2Ptr'.)
230129203Scognet-------------------------------------------------------------------------------
231129203Scognet*/
232129203ScognetINLINE void
233129203Scognet shift128ExtraRightJamming(
234129203Scognet     bits64 a0,
235129203Scognet     bits64 a1,
236129203Scognet     bits64 a2,
237129203Scognet     int16 count,
238129203Scognet     bits64 *z0Ptr,
239129203Scognet     bits64 *z1Ptr,
240129203Scognet     bits64 *z2Ptr
241129203Scognet )
242129203Scognet{
243129203Scognet    bits64 z0, z1, z2;
244129203Scognet    int8 negCount = ( - count ) & 63;
245129203Scognet
246129203Scognet    if ( count == 0 ) {
247129203Scognet        z2 = a2;
248129203Scognet        z1 = a1;
249129203Scognet        z0 = a0;
250129203Scognet    }
251129203Scognet    else {
252129203Scognet        if ( count < 64 ) {
253129203Scognet            z2 = a1<<negCount;
254129203Scognet            z1 = ( a0<<negCount ) | ( a1>>count );
255129203Scognet            z0 = a0>>count;
256129203Scognet        }
257129203Scognet        else {
258129203Scognet            if ( count == 64 ) {
259129203Scognet                z2 = a1;
260129203Scognet                z1 = a0;
261129203Scognet            }
262129203Scognet            else {
263129203Scognet                a2 |= a1;
264129203Scognet                if ( count < 128 ) {
265129203Scognet                    z2 = a0<<negCount;
266129203Scognet                    z1 = a0>>( count & 63 );
267129203Scognet                }
268129203Scognet                else {
269129203Scognet                    z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
270129203Scognet                    z1 = 0;
271129203Scognet                }
272129203Scognet            }
273129203Scognet            z0 = 0;
274129203Scognet        }
275129203Scognet        z2 |= ( a2 != 0 );
276129203Scognet    }
277129203Scognet    *z2Ptr = z2;
278129203Scognet    *z1Ptr = z1;
279129203Scognet    *z0Ptr = z0;
280129203Scognet
281129203Scognet}
282129203Scognet
283129203Scognet/*
284129203Scognet-------------------------------------------------------------------------------
285129203ScognetShifts the 128-bit value formed by concatenating `a0' and `a1' left by the
286129203Scognetnumber of bits given in `count'.  Any bits shifted off are lost.  The value
287129203Scognetof `count' must be less than 64.  The result is broken into two 64-bit
288129203Scognetpieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
289129203Scognet-------------------------------------------------------------------------------
290129203Scognet*/
291129203ScognetINLINE void
292129203Scognet shortShift128Left(
293129203Scognet     bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr )
294129203Scognet{
295129203Scognet
296129203Scognet    *z1Ptr = a1<<count;
297129203Scognet    *z0Ptr =
298129203Scognet        ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
299129203Scognet
300129203Scognet}
301129203Scognet
302129203Scognet/*
303129203Scognet-------------------------------------------------------------------------------
304129203ScognetShifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
305129203Scognetby the number of bits given in `count'.  Any bits shifted off are lost.
306129203ScognetThe value of `count' must be less than 64.  The result is broken into three
307129203Scognet64-bit pieces which are stored at the locations pointed to by `z0Ptr',
308129203Scognet`z1Ptr', and `z2Ptr'.
309129203Scognet-------------------------------------------------------------------------------
310129203Scognet*/
311129203ScognetINLINE void
312129203Scognet shortShift192Left(
313129203Scognet     bits64 a0,
314129203Scognet     bits64 a1,
315129203Scognet     bits64 a2,
316129203Scognet     int16 count,
317129203Scognet     bits64 *z0Ptr,
318129203Scognet     bits64 *z1Ptr,
319129203Scognet     bits64 *z2Ptr
320129203Scognet )
321129203Scognet{
322129203Scognet    bits64 z0, z1, z2;
323129203Scognet    int8 negCount;
324129203Scognet
325129203Scognet    z2 = a2<<count;
326129203Scognet    z1 = a1<<count;
327129203Scognet    z0 = a0<<count;
328129203Scognet    if ( 0 < count ) {
329129203Scognet        negCount = ( ( - count ) & 63 );
330129203Scognet        z1 |= a2>>negCount;
331129203Scognet        z0 |= a1>>negCount;
332129203Scognet    }
333129203Scognet    *z2Ptr = z2;
334129203Scognet    *z1Ptr = z1;
335129203Scognet    *z0Ptr = z0;
336129203Scognet
337129203Scognet}
338129203Scognet
339129203Scognet/*
340129203Scognet-------------------------------------------------------------------------------
341129203ScognetAdds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
342129203Scognetvalue formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
343129203Scognetany carry out is lost.  The result is broken into two 64-bit pieces which
344129203Scognetare stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
345129203Scognet-------------------------------------------------------------------------------
346129203Scognet*/
347129203ScognetINLINE void
348129203Scognet add128(
349129203Scognet     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
350129203Scognet{
351129203Scognet    bits64 z1;
352129203Scognet
353129203Scognet    z1 = a1 + b1;
354129203Scognet    *z1Ptr = z1;
355129203Scognet    *z0Ptr = a0 + b0 + ( z1 < a1 );
356129203Scognet
357129203Scognet}
358129203Scognet
359129203Scognet/*
360129203Scognet-------------------------------------------------------------------------------
361129203ScognetAdds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
362129203Scognet192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
363129203Scognetmodulo 2^192, so any carry out is lost.  The result is broken into three
364129203Scognet64-bit pieces which are stored at the locations pointed to by `z0Ptr',
365129203Scognet`z1Ptr', and `z2Ptr'.
366129203Scognet-------------------------------------------------------------------------------
367129203Scognet*/
368129203ScognetINLINE void
369129203Scognet add192(
370129203Scognet     bits64 a0,
371129203Scognet     bits64 a1,
372129203Scognet     bits64 a2,
373129203Scognet     bits64 b0,
374129203Scognet     bits64 b1,
375129203Scognet     bits64 b2,
376129203Scognet     bits64 *z0Ptr,
377129203Scognet     bits64 *z1Ptr,
378129203Scognet     bits64 *z2Ptr
379129203Scognet )
380129203Scognet{
381129203Scognet    bits64 z0, z1, z2;
382129203Scognet    int8 carry0, carry1;
383129203Scognet
384129203Scognet    z2 = a2 + b2;
385129203Scognet    carry1 = ( z2 < a2 );
386129203Scognet    z1 = a1 + b1;
387129203Scognet    carry0 = ( z1 < a1 );
388129203Scognet    z0 = a0 + b0;
389129203Scognet    z1 += carry1;
390230363Sdas    z0 += ( z1 < (bits64)carry1 );
391129203Scognet    z0 += carry0;
392129203Scognet    *z2Ptr = z2;
393129203Scognet    *z1Ptr = z1;
394129203Scognet    *z0Ptr = z0;
395129203Scognet
396129203Scognet}
397129203Scognet
398129203Scognet/*
399129203Scognet-------------------------------------------------------------------------------
400129203ScognetSubtracts the 128-bit value formed by concatenating `b0' and `b1' from the
401129203Scognet128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
402129203Scognet2^128, so any borrow out (carry out) is lost.  The result is broken into two
403129203Scognet64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
404129203Scognet`z1Ptr'.
405129203Scognet-------------------------------------------------------------------------------
406129203Scognet*/
407129203ScognetINLINE void
408129203Scognet sub128(
409129203Scognet     bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr )
410129203Scognet{
411129203Scognet
412129203Scognet    *z1Ptr = a1 - b1;
413129203Scognet    *z0Ptr = a0 - b0 - ( a1 < b1 );
414129203Scognet
415129203Scognet}
416129203Scognet
417129203Scognet/*
418129203Scognet-------------------------------------------------------------------------------
419129203ScognetSubtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
420129203Scognetfrom the 192-bit value formed by concatenating `a0', `a1', and `a2'.
421129203ScognetSubtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
422129203Scognetresult is broken into three 64-bit pieces which are stored at the locations
423129203Scognetpointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
424129203Scognet-------------------------------------------------------------------------------
425129203Scognet*/
426129203ScognetINLINE void
427129203Scognet sub192(
428129203Scognet     bits64 a0,
429129203Scognet     bits64 a1,
430129203Scognet     bits64 a2,
431129203Scognet     bits64 b0,
432129203Scognet     bits64 b1,
433129203Scognet     bits64 b2,
434129203Scognet     bits64 *z0Ptr,
435129203Scognet     bits64 *z1Ptr,
436129203Scognet     bits64 *z2Ptr
437129203Scognet )
438129203Scognet{
439129203Scognet    bits64 z0, z1, z2;
440129203Scognet    int8 borrow0, borrow1;
441129203Scognet
442129203Scognet    z2 = a2 - b2;
443129203Scognet    borrow1 = ( a2 < b2 );
444129203Scognet    z1 = a1 - b1;
445129203Scognet    borrow0 = ( a1 < b1 );
446129203Scognet    z0 = a0 - b0;
447230363Sdas    z0 -= ( z1 < (bits64)borrow1 );
448129203Scognet    z1 -= borrow1;
449129203Scognet    z0 -= borrow0;
450129203Scognet    *z2Ptr = z2;
451129203Scognet    *z1Ptr = z1;
452129203Scognet    *z0Ptr = z0;
453129203Scognet
454129203Scognet}
455129203Scognet
456129203Scognet/*
457129203Scognet-------------------------------------------------------------------------------
458129203ScognetMultiplies `a' by `b' to obtain a 128-bit product.  The product is broken
459129203Scognetinto two 64-bit pieces which are stored at the locations pointed to by
460129203Scognet`z0Ptr' and `z1Ptr'.
461129203Scognet-------------------------------------------------------------------------------
462129203Scognet*/
463129203ScognetINLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr )
464129203Scognet{
465129203Scognet    bits32 aHigh, aLow, bHigh, bLow;
466129203Scognet    bits64 z0, zMiddleA, zMiddleB, z1;
467129203Scognet
468129203Scognet    aLow = a;
469129203Scognet    aHigh = a>>32;
470129203Scognet    bLow = b;
471129203Scognet    bHigh = b>>32;
472129203Scognet    z1 = ( (bits64) aLow ) * bLow;
473129203Scognet    zMiddleA = ( (bits64) aLow ) * bHigh;
474129203Scognet    zMiddleB = ( (bits64) aHigh ) * bLow;
475129203Scognet    z0 = ( (bits64) aHigh ) * bHigh;
476129203Scognet    zMiddleA += zMiddleB;
477129203Scognet    z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
478129203Scognet    zMiddleA <<= 32;
479129203Scognet    z1 += zMiddleA;
480129203Scognet    z0 += ( z1 < zMiddleA );
481129203Scognet    *z1Ptr = z1;
482129203Scognet    *z0Ptr = z0;
483129203Scognet
484129203Scognet}
485129203Scognet
486129203Scognet/*
487129203Scognet-------------------------------------------------------------------------------
488129203ScognetMultiplies the 128-bit value formed by concatenating `a0' and `a1' by
489129203Scognet`b' to obtain a 192-bit product.  The product is broken into three 64-bit
490129203Scognetpieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
491129203Scognet`z2Ptr'.
492129203Scognet-------------------------------------------------------------------------------
493129203Scognet*/
494129203ScognetINLINE void
495129203Scognet mul128By64To192(
496129203Scognet     bits64 a0,
497129203Scognet     bits64 a1,
498129203Scognet     bits64 b,
499129203Scognet     bits64 *z0Ptr,
500129203Scognet     bits64 *z1Ptr,
501129203Scognet     bits64 *z2Ptr
502129203Scognet )
503129203Scognet{
504129203Scognet    bits64 z0, z1, z2, more1;
505129203Scognet
506129203Scognet    mul64To128( a1, b, &z1, &z2 );
507129203Scognet    mul64To128( a0, b, &z0, &more1 );
508129203Scognet    add128( z0, more1, 0, z1, &z0, &z1 );
509129203Scognet    *z2Ptr = z2;
510129203Scognet    *z1Ptr = z1;
511129203Scognet    *z0Ptr = z0;
512129203Scognet
513129203Scognet}
514129203Scognet
515129203Scognet/*
516129203Scognet-------------------------------------------------------------------------------
517129203ScognetMultiplies the 128-bit value formed by concatenating `a0' and `a1' to the
518129203Scognet128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
519129203Scognetproduct.  The product is broken into four 64-bit pieces which are stored at
520129203Scognetthe locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
521129203Scognet-------------------------------------------------------------------------------
522129203Scognet*/
523129203ScognetINLINE void
524129203Scognet mul128To256(
525129203Scognet     bits64 a0,
526129203Scognet     bits64 a1,
527129203Scognet     bits64 b0,
528129203Scognet     bits64 b1,
529129203Scognet     bits64 *z0Ptr,
530129203Scognet     bits64 *z1Ptr,
531129203Scognet     bits64 *z2Ptr,
532129203Scognet     bits64 *z3Ptr
533129203Scognet )
534129203Scognet{
535129203Scognet    bits64 z0, z1, z2, z3;
536129203Scognet    bits64 more1, more2;
537129203Scognet
538129203Scognet    mul64To128( a1, b1, &z2, &z3 );
539129203Scognet    mul64To128( a1, b0, &z1, &more2 );
540129203Scognet    add128( z1, more2, 0, z2, &z1, &z2 );
541129203Scognet    mul64To128( a0, b0, &z0, &more1 );
542129203Scognet    add128( z0, more1, 0, z1, &z0, &z1 );
543129203Scognet    mul64To128( a0, b1, &more1, &more2 );
544129203Scognet    add128( more1, more2, 0, z2, &more1, &z2 );
545129203Scognet    add128( z0, z1, 0, more1, &z0, &z1 );
546129203Scognet    *z3Ptr = z3;
547129203Scognet    *z2Ptr = z2;
548129203Scognet    *z1Ptr = z1;
549129203Scognet    *z0Ptr = z0;
550129203Scognet
551129203Scognet}
552129203Scognet
553129203Scognet/*
554129203Scognet-------------------------------------------------------------------------------
555129203ScognetReturns an approximation to the 64-bit integer quotient obtained by dividing
556129203Scognet`b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
557129203Scognetdivisor `b' must be at least 2^63.  If q is the exact quotient truncated
558129203Scognettoward zero, the approximation returned lies between q and q + 2 inclusive.
559129203ScognetIf the exact quotient q is larger than 64 bits, the maximum positive 64-bit
560129203Scognetunsigned integer is returned.
561129203Scognet-------------------------------------------------------------------------------
562129203Scognet*/
563129203Scognetstatic bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b )
564129203Scognet{
565129203Scognet    bits64 b0, b1;
566129203Scognet    bits64 rem0, rem1, term0, term1;
567129203Scognet    bits64 z;
568129203Scognet
569129203Scognet    if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
570129203Scognet    b0 = b>>32;
571129203Scognet    z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
572129203Scognet    mul64To128( b, z, &term0, &term1 );
573129203Scognet    sub128( a0, a1, term0, term1, &rem0, &rem1 );
574129203Scognet    while ( ( (sbits64) rem0 ) < 0 ) {
575129203Scognet        z -= LIT64( 0x100000000 );
576129203Scognet        b1 = b<<32;
577129203Scognet        add128( rem0, rem1, b0, b1, &rem0, &rem1 );
578129203Scognet    }
579129203Scognet    rem0 = ( rem0<<32 ) | ( rem1>>32 );
580129203Scognet    z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
581129203Scognet    return z;
582129203Scognet
583129203Scognet}
584129203Scognet
585129203Scognet#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
586129203Scognet/*
587129203Scognet-------------------------------------------------------------------------------
588129203ScognetReturns an approximation to the square root of the 32-bit significand given
589129203Scognetby `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
590129203Scognet`aExp' (the least significant bit) is 1, the integer returned approximates
591129203Scognet2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
592129203Scognetis 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
593129203Scognetcase, the approximation returned lies strictly within +/-2 of the exact
594129203Scognetvalue.
595129203Scognet-------------------------------------------------------------------------------
596129203Scognet*/
597129203Scognetstatic bits32 estimateSqrt32( int16 aExp, bits32 a )
598129203Scognet{
599129203Scognet    static const bits16 sqrtOddAdjustments[] = {
600129203Scognet        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
601129203Scognet        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
602129203Scognet    };
603129203Scognet    static const bits16 sqrtEvenAdjustments[] = {
604129203Scognet        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
605129203Scognet        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
606129203Scognet    };
607129203Scognet    int8 idx;
608129203Scognet    bits32 z;
609129203Scognet
610129203Scognet    idx = ( a>>27 ) & 15;
611129203Scognet    if ( aExp & 1 ) {
612129203Scognet        z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ idx ];
613129203Scognet        z = ( ( a / z )<<14 ) + ( z<<15 );
614129203Scognet        a >>= 1;
615129203Scognet    }
616129203Scognet    else {
617129203Scognet        z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ idx ];
618129203Scognet        z = a / z + z;
619129203Scognet        z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
620129203Scognet        if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 );
621129203Scognet    }
622129203Scognet    return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 );
623129203Scognet
624129203Scognet}
625129203Scognet#endif
626129203Scognet
627129203Scognet/*
628129203Scognet-------------------------------------------------------------------------------
629129203ScognetReturns the number of leading 0 bits before the most-significant 1 bit of
630129203Scognet`a'.  If `a' is zero, 32 is returned.
631129203Scognet-------------------------------------------------------------------------------
632129203Scognet*/
633129203Scognetstatic int8 countLeadingZeros32( bits32 a )
634129203Scognet{
635129203Scognet    static const int8 countLeadingZerosHigh[] = {
636129203Scognet        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
637129203Scognet        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
638129203Scognet        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
639129203Scognet        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
640129203Scognet        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
641129203Scognet        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
642129203Scognet        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
643129203Scognet        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
644129203Scognet        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
645129203Scognet        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
646129203Scognet        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647129203Scognet        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
648129203Scognet        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
649129203Scognet        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650129203Scognet        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
651129203Scognet        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
652129203Scognet    };
653129203Scognet    int8 shiftCount;
654129203Scognet
655129203Scognet    shiftCount = 0;
656129203Scognet    if ( a < 0x10000 ) {
657129203Scognet        shiftCount += 16;
658129203Scognet        a <<= 16;
659129203Scognet    }
660129203Scognet    if ( a < 0x1000000 ) {
661129203Scognet        shiftCount += 8;
662129203Scognet        a <<= 8;
663129203Scognet    }
664129203Scognet    shiftCount += countLeadingZerosHigh[ a>>24 ];
665129203Scognet    return shiftCount;
666129203Scognet
667129203Scognet}
668129203Scognet
669129203Scognet/*
670129203Scognet-------------------------------------------------------------------------------
671129203ScognetReturns the number of leading 0 bits before the most-significant 1 bit of
672129203Scognet`a'.  If `a' is zero, 64 is returned.
673129203Scognet-------------------------------------------------------------------------------
674129203Scognet*/
675129203Scognetstatic int8 countLeadingZeros64( bits64 a )
676129203Scognet{
677129203Scognet    int8 shiftCount;
678129203Scognet
679129203Scognet    shiftCount = 0;
680129203Scognet    if ( a < ( (bits64) 1 )<<32 ) {
681129203Scognet        shiftCount += 32;
682129203Scognet    }
683129203Scognet    else {
684129203Scognet        a >>= 32;
685129203Scognet    }
686129203Scognet    shiftCount += countLeadingZeros32( a );
687129203Scognet    return shiftCount;
688129203Scognet
689129203Scognet}
690129203Scognet
691129203Scognet/*
692129203Scognet-------------------------------------------------------------------------------
693129203ScognetReturns 1 if the 128-bit value formed by concatenating `a0' and `a1'
694129203Scognetis equal to the 128-bit value formed by concatenating `b0' and `b1'.
695129203ScognetOtherwise, returns 0.
696129203Scognet-------------------------------------------------------------------------------
697129203Scognet*/
698129203ScognetINLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
699129203Scognet{
700129203Scognet
701129203Scognet    return ( a0 == b0 ) && ( a1 == b1 );
702129203Scognet
703129203Scognet}
704129203Scognet
705129203Scognet/*
706129203Scognet-------------------------------------------------------------------------------
707129203ScognetReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
708129203Scognetthan or equal to the 128-bit value formed by concatenating `b0' and `b1'.
709129203ScognetOtherwise, returns 0.
710129203Scognet-------------------------------------------------------------------------------
711129203Scognet*/
712129203ScognetINLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
713129203Scognet{
714129203Scognet
715129203Scognet    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
716129203Scognet
717129203Scognet}
718129203Scognet
719129203Scognet/*
720129203Scognet-------------------------------------------------------------------------------
721129203ScognetReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
722129203Scognetthan the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
723129203Scognetreturns 0.
724129203Scognet-------------------------------------------------------------------------------
725129203Scognet*/
726129203ScognetINLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
727129203Scognet{
728129203Scognet
729129203Scognet    return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
730129203Scognet
731129203Scognet}
732129203Scognet
733129203Scognet/*
734129203Scognet-------------------------------------------------------------------------------
735129203ScognetReturns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
736129203Scognetnot equal to the 128-bit value formed by concatenating `b0' and `b1'.
737129203ScognetOtherwise, returns 0.
738129203Scognet-------------------------------------------------------------------------------
739129203Scognet*/
740129203ScognetINLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 )
741129203Scognet{
742129203Scognet
743129203Scognet    return ( a0 != b0 ) || ( a1 != b1 );
744129203Scognet
745129203Scognet}
746129203Scognet
747