1/* Software floating-point emulation.
2   Basic four-word fraction declaration and manipulation.
3   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5   Contributed by Richard Henderson (rth@cygnus.com),
6		  Jakub Jelinek (jj@ultra.linux.cz),
7		  David S. Miller (davem@redhat.com) and
8		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10   The GNU C Library is free software; you can redistribute it and/or
11   modify it under the terms of the GNU Lesser General Public
12   License as published by the Free Software Foundation; either
13   version 2.1 of the License, or (at your option) any later version.
14
15   In addition to the permissions in the GNU Lesser General Public
16   License, the Free Software Foundation gives you unlimited
17   permission to link the compiled version of this file into
18   combinations with other programs, and to distribute those
19   combinations without any restriction coming from the use of this
20   file.  (The Lesser General Public License restrictions do apply in
21   other respects; for example, they cover modification of the file,
22   and distribution when not linked into a combine executable.)
23
24   The GNU C Library is distributed in the hope that it will be useful,
25   but WITHOUT ANY WARRANTY; without even the implied warranty of
26   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27   Lesser General Public License for more details.
28
29   You should have received a copy of the GNU Lesser General Public
30   License along with the GNU C Library; if not, write to the Free
31   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
32   MA 02110-1301, USA.  */
33
34#define _FP_FRAC_DECL_4(X)	_FP_W_TYPE X##_f[4]
35#define _FP_FRAC_COPY_4(D,S)			\
36  (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],	\
37   D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
38#define _FP_FRAC_SET_4(X,I)	__FP_FRAC_SET_4(X, I)
39#define _FP_FRAC_HIGH_4(X)	(X##_f[3])
40#define _FP_FRAC_LOW_4(X)	(X##_f[0])
41#define _FP_FRAC_WORD_4(X,w)	(X##_f[w])
42
43#define _FP_FRAC_SLL_4(X,N)						\
44  do {									\
45    _FP_I_TYPE _up, _down, _skip, _i;					\
46    _skip = (N) / _FP_W_TYPE_SIZE;					\
47    _up = (N) % _FP_W_TYPE_SIZE;					\
48    _down = _FP_W_TYPE_SIZE - _up;					\
49    if (!_up)								\
50      for (_i = 3; _i >= _skip; --_i)					\
51	X##_f[_i] = X##_f[_i-_skip];					\
52    else								\
53      {									\
54	for (_i = 3; _i > _skip; --_i)					\
55	  X##_f[_i] = X##_f[_i-_skip] << _up				\
56		      | X##_f[_i-_skip-1] >> _down;			\
57	X##_f[_i--] = X##_f[0] << _up; 					\
58      }									\
59    for (; _i >= 0; --_i)						\
60      X##_f[_i] = 0;							\
61  } while (0)
62
63/* This one was broken too */
64#define _FP_FRAC_SRL_4(X,N)						\
65  do {									\
66    _FP_I_TYPE _up, _down, _skip, _i;					\
67    _skip = (N) / _FP_W_TYPE_SIZE;					\
68    _down = (N) % _FP_W_TYPE_SIZE;					\
69    _up = _FP_W_TYPE_SIZE - _down;					\
70    if (!_down)								\
71      for (_i = 0; _i <= 3-_skip; ++_i)					\
72	X##_f[_i] = X##_f[_i+_skip];					\
73    else								\
74      {									\
75	for (_i = 0; _i < 3-_skip; ++_i)				\
76	  X##_f[_i] = X##_f[_i+_skip] >> _down				\
77		      | X##_f[_i+_skip+1] << _up;			\
78	X##_f[_i++] = X##_f[3] >> _down;				\
79      }									\
80    for (; _i < 4; ++_i)						\
81      X##_f[_i] = 0;							\
82  } while (0)
83
84
85/* Right shift with sticky-lsb.
86 * What this actually means is that we do a standard right-shift,
87 * but that if any of the bits that fall off the right hand side
88 * were one then we always set the LSbit.
89 */
90#define _FP_FRAC_SRST_4(X,S,N,size)			\
91  do {							\
92    _FP_I_TYPE _up, _down, _skip, _i;			\
93    _FP_W_TYPE _s;					\
94    _skip = (N) / _FP_W_TYPE_SIZE;			\
95    _down = (N) % _FP_W_TYPE_SIZE;			\
96    _up = _FP_W_TYPE_SIZE - _down;			\
97    for (_s = _i = 0; _i < _skip; ++_i)			\
98      _s |= X##_f[_i];					\
99    if (!_down)						\
100      for (_i = 0; _i <= 3-_skip; ++_i)			\
101	X##_f[_i] = X##_f[_i+_skip];			\
102    else						\
103      {							\
104	_s |= X##_f[_i] << _up;				\
105	for (_i = 0; _i < 3-_skip; ++_i)		\
106	  X##_f[_i] = X##_f[_i+_skip] >> _down		\
107		      | X##_f[_i+_skip+1] << _up;	\
108	X##_f[_i++] = X##_f[3] >> _down;		\
109      }							\
110    for (; _i < 4; ++_i)				\
111      X##_f[_i] = 0;					\
112    S = (_s != 0);					\
113  } while (0)
114
115#define _FP_FRAC_SRS_4(X,N,size)		\
116  do {						\
117    int _sticky;				\
118    _FP_FRAC_SRST_4(X, _sticky, N, size);	\
119    X##_f[0] |= _sticky;			\
120  } while (0)
121
122#define _FP_FRAC_ADD_4(R,X,Y)						\
123  __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
124		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
125		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
126
127#define _FP_FRAC_SUB_4(R,X,Y)						\
128  __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
129		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
130		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
131
132#define _FP_FRAC_DEC_4(X,Y)						\
133  __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
134		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
135
136#define _FP_FRAC_ADDI_4(X,I)						\
137  __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
138
139#define _FP_ZEROFRAC_4  0,0,0,0
140#define _FP_MINFRAC_4   0,0,0,1
141#define _FP_MAXFRAC_4	(~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
142
143#define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
144#define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
145#define _FP_FRAC_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
146#define _FP_FRAC_CLEAR_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
147
148#define _FP_FRAC_EQ_4(X,Y)				\
149 (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]		\
150  && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
151
152#define _FP_FRAC_GT_4(X,Y)				\
153 (X##_f[3] > Y##_f[3] ||				\
154  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||	\
155   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||	\
156    (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])	\
157   ))							\
158  ))							\
159 )
160
161#define _FP_FRAC_GE_4(X,Y)				\
162 (X##_f[3] > Y##_f[3] ||				\
163  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||	\
164   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||	\
165    (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])	\
166   ))							\
167  ))							\
168 )
169
170
171#define _FP_FRAC_CLZ_4(R,X)		\
172  do {					\
173    if (X##_f[3])			\
174    {					\
175	__FP_CLZ(R,X##_f[3]);		\
176    }					\
177    else if (X##_f[2])			\
178    {					\
179	__FP_CLZ(R,X##_f[2]);		\
180	R += _FP_W_TYPE_SIZE;		\
181    }					\
182    else if (X##_f[1])			\
183    {					\
184	__FP_CLZ(R,X##_f[1]);		\
185	R += _FP_W_TYPE_SIZE*2;		\
186    }					\
187    else				\
188    {					\
189	__FP_CLZ(R,X##_f[0]);		\
190	R += _FP_W_TYPE_SIZE*3;		\
191    }					\
192  } while(0)
193
194
195#define _FP_UNPACK_RAW_4(fs, X, val)				\
196  do {								\
197    union _FP_UNION_##fs _flo; _flo.flt = (val);		\
198    X##_f[0] = _flo.bits.frac0;					\
199    X##_f[1] = _flo.bits.frac1;					\
200    X##_f[2] = _flo.bits.frac2;					\
201    X##_f[3] = _flo.bits.frac3;					\
202    X##_e  = _flo.bits.exp;					\
203    X##_s  = _flo.bits.sign;					\
204  } while (0)
205
206#define _FP_UNPACK_RAW_4_P(fs, X, val)				\
207  do {								\
208    union _FP_UNION_##fs *_flo =				\
209      (union _FP_UNION_##fs *)(val);				\
210								\
211    X##_f[0] = _flo->bits.frac0;				\
212    X##_f[1] = _flo->bits.frac1;				\
213    X##_f[2] = _flo->bits.frac2;				\
214    X##_f[3] = _flo->bits.frac3;				\
215    X##_e  = _flo->bits.exp;					\
216    X##_s  = _flo->bits.sign;					\
217  } while (0)
218
219#define _FP_PACK_RAW_4(fs, val, X)				\
220  do {								\
221    union _FP_UNION_##fs _flo;					\
222    _flo.bits.frac0 = X##_f[0];					\
223    _flo.bits.frac1 = X##_f[1];					\
224    _flo.bits.frac2 = X##_f[2];					\
225    _flo.bits.frac3 = X##_f[3];					\
226    _flo.bits.exp   = X##_e;					\
227    _flo.bits.sign  = X##_s;					\
228    (val) = _flo.flt;				   		\
229  } while (0)
230
231#define _FP_PACK_RAW_4_P(fs, val, X)				\
232  do {								\
233    union _FP_UNION_##fs *_flo =				\
234      (union _FP_UNION_##fs *)(val);				\
235								\
236    _flo->bits.frac0 = X##_f[0];				\
237    _flo->bits.frac1 = X##_f[1];				\
238    _flo->bits.frac2 = X##_f[2];				\
239    _flo->bits.frac3 = X##_f[3];				\
240    _flo->bits.exp   = X##_e;					\
241    _flo->bits.sign  = X##_s;					\
242  } while (0)
243
244/*
245 * Multiplication algorithms:
246 */
247
248/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
249
250#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)			    \
251  do {									    \
252    _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	    \
253    _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f);	    \
254									    \
255    doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \
256    doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]);				    \
257    doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]);				    \
258    doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]);				    \
259    doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]);				    \
260    doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]);				    \
261    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
262		    _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0,		    \
263		    0,0,_FP_FRAC_WORD_8(_z,1));				    \
264    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
265		    _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0,		    \
266		    _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
267		    _FP_FRAC_WORD_8(_z,1));				    \
268    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
269		    _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0,		    \
270		    0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2));	    \
271    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
272		    _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0,		    \
273		    _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
274		    _FP_FRAC_WORD_8(_z,2));				    \
275    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
276		    _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0,		    \
277		    _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
278		    _FP_FRAC_WORD_8(_z,2));				    \
279    doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]);				    \
280    doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]);				    \
281    doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]);				    \
282    doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]);				    \
283    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
284		    _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0,		    \
285		    0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3));	    \
286    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
287		    _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0,		    \
288		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
289		    _FP_FRAC_WORD_8(_z,3));				    \
290    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
291		    _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0,		    \
292		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
293		    _FP_FRAC_WORD_8(_z,3));				    \
294    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
295		    _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0,		    \
296		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
297		    _FP_FRAC_WORD_8(_z,3));				    \
298    doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]);				    \
299    doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]);				    \
300    doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]);				    \
301    doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]);				    \
302    doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]);				    \
303    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
304		    _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0,		    \
305		    0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4));	    \
306    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
307		    _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0,		    \
308		    _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
309		    _FP_FRAC_WORD_8(_z,4));				    \
310    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
311		    _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0,		    \
312		    _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
313		    _FP_FRAC_WORD_8(_z,4));				    \
314    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
315		    _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0,		    \
316		    0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5));	    \
317    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
318		    _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0,		    \
319		    _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
320		    _FP_FRAC_WORD_8(_z,5));				    \
321    doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]);				    \
322    __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
323		    _b_f1,_b_f0,					    \
324		    _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6));	    \
325									    \
326    /* Normalize since we know where the msb of the multiplicands	    \
327       were (bit B), we know that the msb of the of the product is	    \
328       at either 2B or 2B-1.  */					    \
329    _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);			    \
330    __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),	    \
331		    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));	    \
332  } while (0)
333
334#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)				    \
335  do {									    \
336    _FP_FRAC_DECL_8(_z);						    \
337									    \
338    mpn_mul_n(_z_f, _x_f, _y_f, 4);					    \
339									    \
340    /* Normalize since we know where the msb of the multiplicands	    \
341       were (bit B), we know that the msb of the of the product is	    \
342       at either 2B or 2B-1.  */					    \
343    _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);	 		    \
344    __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),	    \
345		    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));	    \
346  } while (0)
347
348/*
349 * Helper utility for _FP_DIV_MEAT_4_udiv:
350 * pppp = m * nnn
351 */
352#define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0)				    \
353  do {									    \
354    UWtype _t;								    \
355    umul_ppmm(p1,p0,m,n0);						    \
356    umul_ppmm(p2,_t,m,n1);						    \
357    __FP_FRAC_ADDI_2(p2,p1,_t);						    \
358    umul_ppmm(p3,_t,m,n2);						    \
359    __FP_FRAC_ADDI_2(p3,p2,_t);						    \
360  } while (0)
361
362/*
363 * Division algorithms:
364 */
365
366#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)				    \
367  do {									    \
368    int _i;								    \
369    _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m);				    \
370    _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4);					    \
371    if (_FP_FRAC_GT_4(X, Y))						    \
372      {									    \
373	_n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1);			    \
374	_FP_FRAC_SRL_4(X, 1);						    \
375      }									    \
376    else								    \
377      R##_e--;								    \
378									    \
379    /* Normalize, i.e. make the most significant bit of the 		    \
380       denominator set. */						    \
381    _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs);				    \
382									    \
383    for (_i = 3; ; _i--)						    \
384      {									    \
385        if (X##_f[3] == Y##_f[3])					    \
386          {								    \
387            /* This is a special case, not an optimization		    \
388               (X##_f[3]/Y##_f[3] would not fit into UWtype).		    \
389               As X## is guaranteed to be < Y,  R##_f[_i] can be either	    \
390               (UWtype)-1 or (UWtype)-2.  */				    \
391            R##_f[_i] = -1;						    \
392            if (!_i)							    \
393	      break;							    \
394            __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],	    \
395			    Y##_f[2], Y##_f[1], Y##_f[0], 0,		    \
396			    X##_f[2], X##_f[1], X##_f[0], _n_f[_i]);	    \
397            _FP_FRAC_SUB_4(X, Y, X);					    \
398            if (X##_f[3] > Y##_f[3])					    \
399              {								    \
400                R##_f[_i] = -2;						    \
401                _FP_FRAC_ADD_4(X, Y, X);				    \
402              }								    \
403          }								    \
404        else								    \
405          {								    \
406            udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]);  \
407            umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0],		    \
408			  R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]);	    \
409            X##_f[2] = X##_f[1];					    \
410            X##_f[1] = X##_f[0];					    \
411            X##_f[0] = _n_f[_i];					    \
412            if (_FP_FRAC_GT_4(_m, X))					    \
413              {								    \
414                R##_f[_i]--;						    \
415                _FP_FRAC_ADD_4(X, Y, X);				    \
416                if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X))	    \
417                  {							    \
418		    R##_f[_i]--;					    \
419		    _FP_FRAC_ADD_4(X, Y, X);				    \
420                  }							    \
421              }								    \
422            _FP_FRAC_DEC_4(X, _m);					    \
423            if (!_i)							    \
424	      {								    \
425		if (!_FP_FRAC_EQ_4(X, _m))				    \
426		  R##_f[0] |= _FP_WORK_STICKY;				    \
427		break;							    \
428	      }								    \
429          }								    \
430      }									    \
431  } while (0)
432
433
434/*
435 * Square root algorithms:
436 * We have just one right now, maybe Newton approximation
437 * should be added for those machines where division is fast.
438 */
439
440#define _FP_SQRT_MEAT_4(R, S, T, X, q)				\
441  do {								\
442    while (q)							\
443      {								\
444	T##_f[3] = S##_f[3] + q;				\
445	if (T##_f[3] <= X##_f[3])				\
446	  {							\
447	    S##_f[3] = T##_f[3] + q;				\
448	    X##_f[3] -= T##_f[3];				\
449	    R##_f[3] += q;					\
450	  }							\
451	_FP_FRAC_SLL_4(X, 1);					\
452	q >>= 1;						\
453      }								\
454    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
455    while (q)							\
456      {								\
457	T##_f[2] = S##_f[2] + q;				\
458	T##_f[3] = S##_f[3];					\
459	if (T##_f[3] < X##_f[3] || 				\
460	    (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))	\
461	  {							\
462	    S##_f[2] = T##_f[2] + q;				\
463	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
464	    __FP_FRAC_DEC_2(X##_f[3], X##_f[2],			\
465			    T##_f[3], T##_f[2]);		\
466	    R##_f[2] += q;					\
467	  }							\
468	_FP_FRAC_SLL_4(X, 1);					\
469	q >>= 1;						\
470      }								\
471    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
472    while (q)							\
473      {								\
474	T##_f[1] = S##_f[1] + q;				\
475	T##_f[2] = S##_f[2];					\
476	T##_f[3] = S##_f[3];					\
477	if (T##_f[3] < X##_f[3] || 				\
478	    (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] ||	\
479	     (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1]))))	\
480	  {							\
481	    S##_f[1] = T##_f[1] + q;				\
482	    S##_f[2] += (T##_f[1] > S##_f[1]);			\
483	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
484	    __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1],	\
485	    		    T##_f[3], T##_f[2], T##_f[1]);	\
486	    R##_f[1] += q;					\
487	  }							\
488	_FP_FRAC_SLL_4(X, 1);					\
489	q >>= 1;						\
490      }								\
491    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
492    while (q != _FP_WORK_ROUND)					\
493      {								\
494	T##_f[0] = S##_f[0] + q;				\
495	T##_f[1] = S##_f[1];					\
496	T##_f[2] = S##_f[2];					\
497	T##_f[3] = S##_f[3];					\
498	if (_FP_FRAC_GE_4(X,T))					\
499	  {							\
500	    S##_f[0] = T##_f[0] + q;				\
501	    S##_f[1] += (T##_f[0] > S##_f[0]);			\
502	    S##_f[2] += (T##_f[1] > S##_f[1]);			\
503	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
504	    _FP_FRAC_DEC_4(X, T);				\
505	    R##_f[0] += q;					\
506	  }							\
507	_FP_FRAC_SLL_4(X, 1);					\
508	q >>= 1;						\
509      }								\
510    if (!_FP_FRAC_ZEROP_4(X))					\
511      {								\
512	if (_FP_FRAC_GT_4(X,S))					\
513	  R##_f[0] |= _FP_WORK_ROUND;				\
514	R##_f[0] |= _FP_WORK_STICKY;				\
515      }								\
516  } while (0)
517
518
519/*
520 * Internals
521 */
522
523#define __FP_FRAC_SET_4(X,I3,I2,I1,I0)					\
524  (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
525
526#ifndef __FP_FRAC_ADD_3
527#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
528  do {								\
529    _FP_W_TYPE _c1, _c2;					\
530    r0 = x0 + y0;						\
531    _c1 = r0 < x0;						\
532    r1 = x1 + y1;						\
533    _c2 = r1 < x1;						\
534    r1 += _c1;							\
535    _c2 |= r1 < _c1;						\
536    r2 = x2 + y2 + _c2;						\
537  } while (0)
538#endif
539
540#ifndef __FP_FRAC_ADD_4
541#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
542  do {								\
543    _FP_W_TYPE _c1, _c2, _c3;					\
544    r0 = x0 + y0;						\
545    _c1 = r0 < x0;						\
546    r1 = x1 + y1;						\
547    _c2 = r1 < x1;						\
548    r1 += _c1;							\
549    _c2 |= r1 < _c1;						\
550    r2 = x2 + y2;						\
551    _c3 = r2 < x2;						\
552    r2 += _c2;							\
553    _c3 |= r2 < _c2;						\
554    r3 = x3 + y3 + _c3;						\
555  } while (0)
556#endif
557
558#ifndef __FP_FRAC_SUB_3
559#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
560  do {								\
561    _FP_W_TYPE _c1, _c2;					\
562    r0 = x0 - y0;						\
563    _c1 = r0 > x0;						\
564    r1 = x1 - y1;						\
565    _c2 = r1 > x1;						\
566    r1 -= _c1;							\
567    _c2 |= _c1 && (y1 == x1);					\
568    r2 = x2 - y2 - _c2;						\
569  } while (0)
570#endif
571
572#ifndef __FP_FRAC_SUB_4
573#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
574  do {								\
575    _FP_W_TYPE _c1, _c2, _c3;					\
576    r0 = x0 - y0;						\
577    _c1 = r0 > x0;						\
578    r1 = x1 - y1;						\
579    _c2 = r1 > x1;						\
580    r1 -= _c1;							\
581    _c2 |= _c1 && (y1 == x1);					\
582    r2 = x2 - y2;						\
583    _c3 = r2 > x2;						\
584    r2 -= _c2;							\
585    _c3 |= _c2 && (y2 == x2);					\
586    r3 = x3 - y3 - _c3;						\
587  } while (0)
588#endif
589
590#ifndef __FP_FRAC_DEC_3
591#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0)				\
592  do {									\
593    UWtype _t0, _t1, _t2;						\
594    _t0 = x0, _t1 = x1, _t2 = x2;					\
595    __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0);		\
596  } while (0)
597#endif
598
599#ifndef __FP_FRAC_DEC_4
600#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0)			\
601  do {									\
602    UWtype _t0, _t1, _t2, _t3;						\
603    _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3;				\
604    __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0);		\
605  } while (0)
606#endif
607
608#ifndef __FP_FRAC_ADDI_4
609#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)					\
610  do {									\
611    UWtype _t;								\
612    _t = ((x0 += i) < i);						\
613    x1 += _t; _t = (x1 < _t);						\
614    x2 += _t; _t = (x2 < _t);						\
615    x3 += _t;								\
616  } while (0)
617#endif
618
619/* Convert FP values between word sizes. This appears to be more
620 * complicated than I'd have expected it to be, so these might be
621 * wrong... These macros are in any case somewhat bogus because they
622 * use information about what various FRAC_n variables look like
623 * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
624 * the ones in op-2.h and op-1.h.
625 */
626#define _FP_FRAC_COPY_1_4(D, S)		(D##_f = S##_f[0])
627
628#define _FP_FRAC_COPY_2_4(D, S)			\
629do {						\
630  D##_f0 = S##_f[0];				\
631  D##_f1 = S##_f[1];				\
632} while (0)
633
634/* Assembly/disassembly for converting to/from integral types.
635 * No shifting or overflow handled here.
636 */
637/* Put the FP value X into r, which is an integer of size rsize. */
638#define _FP_FRAC_ASSEMBLE_4(r, X, rsize)				\
639  do {									\
640    if (rsize <= _FP_W_TYPE_SIZE)					\
641      r = X##_f[0];							\
642    else if (rsize <= 2*_FP_W_TYPE_SIZE)				\
643    {									\
644      r = X##_f[1];							\
645      r <<= _FP_W_TYPE_SIZE;						\
646      r += X##_f[0];							\
647    }									\
648    else								\
649    {									\
650      /* I'm feeling lazy so we deal with int == 3words (implausible)*/	\
651      /* and int == 4words as a single case.			 */	\
652      r = X##_f[3];							\
653      r <<= _FP_W_TYPE_SIZE;						\
654      r += X##_f[2];							\
655      r <<= _FP_W_TYPE_SIZE;						\
656      r += X##_f[1];							\
657      r <<= _FP_W_TYPE_SIZE;						\
658      r += X##_f[0];							\
659    }									\
660  } while (0)
661
662/* "No disassemble Number Five!" */
663/* move an integer of size rsize into X's fractional part. We rely on
664 * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
665 * having to mask the values we store into it.
666 */
667#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)				\
668  do {									\
669    X##_f[0] = r;							\
670    X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
671    X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
672    X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
673  } while (0);
674
675#define _FP_FRAC_COPY_4_1(D, S)			\
676do {						\
677  D##_f[0] = S##_f;				\
678  D##_f[1] = D##_f[2] = D##_f[3] = 0;		\
679} while (0)
680
681#define _FP_FRAC_COPY_4_2(D, S)			\
682do {						\
683  D##_f[0] = S##_f0;				\
684  D##_f[1] = S##_f1;				\
685  D##_f[2] = D##_f[3] = 0;			\
686} while (0)
687
688#define _FP_FRAC_COPY_4_4(D,S)	_FP_FRAC_COPY_4(D,S)
689