1169689Skan/* Software floating-point emulation.
2169689Skan   Basic four-word fraction declaration and manipulation.
3171825Skan   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
4169689Skan   This file is part of the GNU C Library.
5169689Skan   Contributed by Richard Henderson (rth@cygnus.com),
6169689Skan		  Jakub Jelinek (jj@ultra.linux.cz),
7169689Skan		  David S. Miller (davem@redhat.com) and
8169689Skan		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
9169689Skan
10169689Skan   The GNU C Library is free software; you can redistribute it and/or
11169689Skan   modify it under the terms of the GNU Lesser General Public
12169689Skan   License as published by the Free Software Foundation; either
13169689Skan   version 2.1 of the License, or (at your option) any later version.
14169689Skan
15169689Skan   In addition to the permissions in the GNU Lesser General Public
16169689Skan   License, the Free Software Foundation gives you unlimited
17169689Skan   permission to link the compiled version of this file into
18169689Skan   combinations with other programs, and to distribute those
19169689Skan   combinations without any restriction coming from the use of this
20169689Skan   file.  (The Lesser General Public License restrictions do apply in
21169689Skan   other respects; for example, they cover modification of the file,
22169689Skan   and distribution when not linked into a combine executable.)
23169689Skan
24169689Skan   The GNU C Library is distributed in the hope that it will be useful,
25169689Skan   but WITHOUT ANY WARRANTY; without even the implied warranty of
26169689Skan   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27169689Skan   Lesser General Public License for more details.
28169689Skan
29169689Skan   You should have received a copy of the GNU Lesser General Public
30169689Skan   License along with the GNU C Library; if not, write to the Free
31169689Skan   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
32169689Skan   MA 02110-1301, USA.  */
33169689Skan
34169689Skan#define _FP_FRAC_DECL_4(X)	_FP_W_TYPE X##_f[4]
35169689Skan#define _FP_FRAC_COPY_4(D,S)			\
36169689Skan  (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],	\
37169689Skan   D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
38169689Skan#define _FP_FRAC_SET_4(X,I)	__FP_FRAC_SET_4(X, I)
39169689Skan#define _FP_FRAC_HIGH_4(X)	(X##_f[3])
40169689Skan#define _FP_FRAC_LOW_4(X)	(X##_f[0])
41169689Skan#define _FP_FRAC_WORD_4(X,w)	(X##_f[w])
42169689Skan
43169689Skan#define _FP_FRAC_SLL_4(X,N)						\
44169689Skan  do {									\
45169689Skan    _FP_I_TYPE _up, _down, _skip, _i;					\
46169689Skan    _skip = (N) / _FP_W_TYPE_SIZE;					\
47169689Skan    _up = (N) % _FP_W_TYPE_SIZE;					\
48169689Skan    _down = _FP_W_TYPE_SIZE - _up;					\
49169689Skan    if (!_up)								\
50169689Skan      for (_i = 3; _i >= _skip; --_i)					\
51169689Skan	X##_f[_i] = X##_f[_i-_skip];					\
52169689Skan    else								\
53169689Skan      {									\
54169689Skan	for (_i = 3; _i > _skip; --_i)					\
55169689Skan	  X##_f[_i] = X##_f[_i-_skip] << _up				\
56169689Skan		      | X##_f[_i-_skip-1] >> _down;			\
57169689Skan	X##_f[_i--] = X##_f[0] << _up; 					\
58169689Skan      }									\
59169689Skan    for (; _i >= 0; --_i)						\
60169689Skan      X##_f[_i] = 0;							\
61169689Skan  } while (0)
62169689Skan
63169689Skan/* This one was broken too */
64169689Skan#define _FP_FRAC_SRL_4(X,N)						\
65169689Skan  do {									\
66169689Skan    _FP_I_TYPE _up, _down, _skip, _i;					\
67169689Skan    _skip = (N) / _FP_W_TYPE_SIZE;					\
68169689Skan    _down = (N) % _FP_W_TYPE_SIZE;					\
69169689Skan    _up = _FP_W_TYPE_SIZE - _down;					\
70169689Skan    if (!_down)								\
71169689Skan      for (_i = 0; _i <= 3-_skip; ++_i)					\
72169689Skan	X##_f[_i] = X##_f[_i+_skip];					\
73169689Skan    else								\
74169689Skan      {									\
75169689Skan	for (_i = 0; _i < 3-_skip; ++_i)				\
76169689Skan	  X##_f[_i] = X##_f[_i+_skip] >> _down				\
77169689Skan		      | X##_f[_i+_skip+1] << _up;			\
78169689Skan	X##_f[_i++] = X##_f[3] >> _down;				\
79169689Skan      }									\
80169689Skan    for (; _i < 4; ++_i)						\
81169689Skan      X##_f[_i] = 0;							\
82169689Skan  } while (0)
83169689Skan
84169689Skan
85169689Skan/* Right shift with sticky-lsb.
86169689Skan * What this actually means is that we do a standard right-shift,
87169689Skan * but that if any of the bits that fall off the right hand side
88169689Skan * were one then we always set the LSbit.
89169689Skan */
90169689Skan#define _FP_FRAC_SRST_4(X,S,N,size)			\
91169689Skan  do {							\
92169689Skan    _FP_I_TYPE _up, _down, _skip, _i;			\
93169689Skan    _FP_W_TYPE _s;					\
94169689Skan    _skip = (N) / _FP_W_TYPE_SIZE;			\
95169689Skan    _down = (N) % _FP_W_TYPE_SIZE;			\
96169689Skan    _up = _FP_W_TYPE_SIZE - _down;			\
97169689Skan    for (_s = _i = 0; _i < _skip; ++_i)			\
98169689Skan      _s |= X##_f[_i];					\
99169689Skan    if (!_down)						\
100169689Skan      for (_i = 0; _i <= 3-_skip; ++_i)			\
101169689Skan	X##_f[_i] = X##_f[_i+_skip];			\
102169689Skan    else						\
103169689Skan      {							\
104169689Skan	_s |= X##_f[_i] << _up;				\
105169689Skan	for (_i = 0; _i < 3-_skip; ++_i)		\
106169689Skan	  X##_f[_i] = X##_f[_i+_skip] >> _down		\
107169689Skan		      | X##_f[_i+_skip+1] << _up;	\
108169689Skan	X##_f[_i++] = X##_f[3] >> _down;		\
109169689Skan      }							\
110169689Skan    for (; _i < 4; ++_i)				\
111169689Skan      X##_f[_i] = 0;					\
112169689Skan    S = (_s != 0);					\
113169689Skan  } while (0)
114169689Skan
115169689Skan#define _FP_FRAC_SRS_4(X,N,size)		\
116169689Skan  do {						\
117169689Skan    int _sticky;				\
118169689Skan    _FP_FRAC_SRST_4(X, _sticky, N, size);	\
119169689Skan    X##_f[0] |= _sticky;			\
120169689Skan  } while (0)
121169689Skan
122169689Skan#define _FP_FRAC_ADD_4(R,X,Y)						\
123169689Skan  __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
124169689Skan		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
125169689Skan		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
126169689Skan
127169689Skan#define _FP_FRAC_SUB_4(R,X,Y)						\
128169689Skan  __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
129169689Skan		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
130169689Skan		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
131169689Skan
132169689Skan#define _FP_FRAC_DEC_4(X,Y)						\
133169689Skan  __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
134169689Skan		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
135169689Skan
136169689Skan#define _FP_FRAC_ADDI_4(X,I)						\
137169689Skan  __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
138169689Skan
139169689Skan#define _FP_ZEROFRAC_4  0,0,0,0
140169689Skan#define _FP_MINFRAC_4   0,0,0,1
141169689Skan#define _FP_MAXFRAC_4	(~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
142169689Skan
143169689Skan#define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
144169689Skan#define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
145169689Skan#define _FP_FRAC_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
146169689Skan#define _FP_FRAC_CLEAR_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
147169689Skan
148169689Skan#define _FP_FRAC_EQ_4(X,Y)				\
149169689Skan (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]		\
150169689Skan  && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
151169689Skan
152169689Skan#define _FP_FRAC_GT_4(X,Y)				\
153169689Skan (X##_f[3] > Y##_f[3] ||				\
154169689Skan  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||	\
155169689Skan   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||	\
156169689Skan    (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])	\
157169689Skan   ))							\
158169689Skan  ))							\
159169689Skan )
160169689Skan
161169689Skan#define _FP_FRAC_GE_4(X,Y)				\
162169689Skan (X##_f[3] > Y##_f[3] ||				\
163169689Skan  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||	\
164169689Skan   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||	\
165169689Skan    (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])	\
166169689Skan   ))							\
167169689Skan  ))							\
168169689Skan )
169169689Skan
170169689Skan
171169689Skan#define _FP_FRAC_CLZ_4(R,X)		\
172169689Skan  do {					\
173169689Skan    if (X##_f[3])			\
174169689Skan    {					\
175169689Skan	__FP_CLZ(R,X##_f[3]);		\
176169689Skan    }					\
177169689Skan    else if (X##_f[2])			\
178169689Skan    {					\
179169689Skan	__FP_CLZ(R,X##_f[2]);		\
180169689Skan	R += _FP_W_TYPE_SIZE;		\
181169689Skan    }					\
182169689Skan    else if (X##_f[1])			\
183169689Skan    {					\
184169689Skan	__FP_CLZ(R,X##_f[1]);		\
185169689Skan	R += _FP_W_TYPE_SIZE*2;		\
186169689Skan    }					\
187169689Skan    else				\
188169689Skan    {					\
189169689Skan	__FP_CLZ(R,X##_f[0]);		\
190169689Skan	R += _FP_W_TYPE_SIZE*3;		\
191169689Skan    }					\
192169689Skan  } while(0)
193169689Skan
194169689Skan
195169689Skan#define _FP_UNPACK_RAW_4(fs, X, val)				\
196169689Skan  do {								\
197169689Skan    union _FP_UNION_##fs _flo; _flo.flt = (val);		\
198169689Skan    X##_f[0] = _flo.bits.frac0;					\
199169689Skan    X##_f[1] = _flo.bits.frac1;					\
200169689Skan    X##_f[2] = _flo.bits.frac2;					\
201169689Skan    X##_f[3] = _flo.bits.frac3;					\
202169689Skan    X##_e  = _flo.bits.exp;					\
203169689Skan    X##_s  = _flo.bits.sign;					\
204169689Skan  } while (0)
205169689Skan
206169689Skan#define _FP_UNPACK_RAW_4_P(fs, X, val)				\
207169689Skan  do {								\
208169689Skan    union _FP_UNION_##fs *_flo =				\
209169689Skan      (union _FP_UNION_##fs *)(val);				\
210169689Skan								\
211169689Skan    X##_f[0] = _flo->bits.frac0;				\
212169689Skan    X##_f[1] = _flo->bits.frac1;				\
213169689Skan    X##_f[2] = _flo->bits.frac2;				\
214169689Skan    X##_f[3] = _flo->bits.frac3;				\
215169689Skan    X##_e  = _flo->bits.exp;					\
216169689Skan    X##_s  = _flo->bits.sign;					\
217169689Skan  } while (0)
218169689Skan
219169689Skan#define _FP_PACK_RAW_4(fs, val, X)				\
220169689Skan  do {								\
221169689Skan    union _FP_UNION_##fs _flo;					\
222169689Skan    _flo.bits.frac0 = X##_f[0];					\
223169689Skan    _flo.bits.frac1 = X##_f[1];					\
224169689Skan    _flo.bits.frac2 = X##_f[2];					\
225169689Skan    _flo.bits.frac3 = X##_f[3];					\
226169689Skan    _flo.bits.exp   = X##_e;					\
227169689Skan    _flo.bits.sign  = X##_s;					\
228169689Skan    (val) = _flo.flt;				   		\
229169689Skan  } while (0)
230169689Skan
231169689Skan#define _FP_PACK_RAW_4_P(fs, val, X)				\
232169689Skan  do {								\
233169689Skan    union _FP_UNION_##fs *_flo =				\
234169689Skan      (union _FP_UNION_##fs *)(val);				\
235169689Skan								\
236169689Skan    _flo->bits.frac0 = X##_f[0];				\
237169689Skan    _flo->bits.frac1 = X##_f[1];				\
238169689Skan    _flo->bits.frac2 = X##_f[2];				\
239169689Skan    _flo->bits.frac3 = X##_f[3];				\
240169689Skan    _flo->bits.exp   = X##_e;					\
241169689Skan    _flo->bits.sign  = X##_s;					\
242169689Skan  } while (0)
243169689Skan
244169689Skan/*
245169689Skan * Multiplication algorithms:
246169689Skan */
247169689Skan
248169689Skan/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
249169689Skan
250169689Skan#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)			    \
251169689Skan  do {									    \
252169689Skan    _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	    \
253169689Skan    _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f);	    \
254169689Skan									    \
255169689Skan    doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \
256169689Skan    doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]);				    \
257169689Skan    doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]);				    \
258169689Skan    doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]);				    \
259169689Skan    doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]);				    \
260169689Skan    doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]);				    \
261169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
262169689Skan		    _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0,		    \
263169689Skan		    0,0,_FP_FRAC_WORD_8(_z,1));				    \
264169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
265169689Skan		    _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0,		    \
266169689Skan		    _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
267169689Skan		    _FP_FRAC_WORD_8(_z,1));				    \
268169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
269169689Skan		    _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0,		    \
270169689Skan		    0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2));	    \
271169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
272169689Skan		    _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0,		    \
273169689Skan		    _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
274169689Skan		    _FP_FRAC_WORD_8(_z,2));				    \
275169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
276169689Skan		    _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0,		    \
277169689Skan		    _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
278169689Skan		    _FP_FRAC_WORD_8(_z,2));				    \
279169689Skan    doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]);				    \
280169689Skan    doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]);				    \
281169689Skan    doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]);				    \
282169689Skan    doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]);				    \
283169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
284169689Skan		    _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0,		    \
285169689Skan		    0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3));	    \
286169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
287169689Skan		    _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0,		    \
288169689Skan		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
289169689Skan		    _FP_FRAC_WORD_8(_z,3));				    \
290169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
291169689Skan		    _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0,		    \
292169689Skan		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
293169689Skan		    _FP_FRAC_WORD_8(_z,3));				    \
294169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
295169689Skan		    _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0,		    \
296169689Skan		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
297169689Skan		    _FP_FRAC_WORD_8(_z,3));				    \
298169689Skan    doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]);				    \
299169689Skan    doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]);				    \
300169689Skan    doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]);				    \
301169689Skan    doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]);				    \
302169689Skan    doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]);				    \
303169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
304169689Skan		    _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0,		    \
305169689Skan		    0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4));	    \
306169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
307169689Skan		    _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0,		    \
308169689Skan		    _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
309169689Skan		    _FP_FRAC_WORD_8(_z,4));				    \
310169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
311169689Skan		    _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0,		    \
312169689Skan		    _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
313169689Skan		    _FP_FRAC_WORD_8(_z,4));				    \
314169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
315169689Skan		    _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0,		    \
316169689Skan		    0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5));	    \
317169689Skan    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
318169689Skan		    _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0,		    \
319169689Skan		    _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
320169689Skan		    _FP_FRAC_WORD_8(_z,5));				    \
321169689Skan    doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]);				    \
322169689Skan    __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
323169689Skan		    _b_f1,_b_f0,					    \
324169689Skan		    _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6));	    \
325169689Skan									    \
326169689Skan    /* Normalize since we know where the msb of the multiplicands	    \
327169689Skan       were (bit B), we know that the msb of the of the product is	    \
328169689Skan       at either 2B or 2B-1.  */					    \
329169689Skan    _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);			    \
330169689Skan    __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),	    \
331169689Skan		    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));	    \
332169689Skan  } while (0)
333169689Skan
334169689Skan#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)				    \
335169689Skan  do {									    \
336169689Skan    _FP_FRAC_DECL_8(_z);						    \
337169689Skan									    \
338169689Skan    mpn_mul_n(_z_f, _x_f, _y_f, 4);					    \
339169689Skan									    \
340169689Skan    /* Normalize since we know where the msb of the multiplicands	    \
341169689Skan       were (bit B), we know that the msb of the of the product is	    \
342169689Skan       at either 2B or 2B-1.  */					    \
343169689Skan    _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);	 		    \
344169689Skan    __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),	    \
345169689Skan		    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));	    \
346169689Skan  } while (0)
347169689Skan
348169689Skan/*
349169689Skan * Helper utility for _FP_DIV_MEAT_4_udiv:
350169689Skan * pppp = m * nnn
351169689Skan */
352169689Skan#define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0)				    \
353169689Skan  do {									    \
354169689Skan    UWtype _t;								    \
355169689Skan    umul_ppmm(p1,p0,m,n0);						    \
356169689Skan    umul_ppmm(p2,_t,m,n1);						    \
357169689Skan    __FP_FRAC_ADDI_2(p2,p1,_t);						    \
358169689Skan    umul_ppmm(p3,_t,m,n2);						    \
359169689Skan    __FP_FRAC_ADDI_2(p3,p2,_t);						    \
360169689Skan  } while (0)
361169689Skan
362169689Skan/*
363169689Skan * Division algorithms:
364169689Skan */
365169689Skan
366169689Skan#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)				    \
367169689Skan  do {									    \
368169689Skan    int _i;								    \
369169689Skan    _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m);				    \
370169689Skan    _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4);					    \
371169689Skan    if (_FP_FRAC_GT_4(X, Y))						    \
372169689Skan      {									    \
373169689Skan	_n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1);			    \
374169689Skan	_FP_FRAC_SRL_4(X, 1);						    \
375169689Skan      }									    \
376169689Skan    else								    \
377169689Skan      R##_e--;								    \
378169689Skan									    \
379169689Skan    /* Normalize, i.e. make the most significant bit of the 		    \
380169689Skan       denominator set. */						    \
381169689Skan    _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs);				    \
382169689Skan									    \
383169689Skan    for (_i = 3; ; _i--)						    \
384169689Skan      {									    \
385169689Skan        if (X##_f[3] == Y##_f[3])					    \
386169689Skan          {								    \
387169689Skan            /* This is a special case, not an optimization		    \
388169689Skan               (X##_f[3]/Y##_f[3] would not fit into UWtype).		    \
389169689Skan               As X## is guaranteed to be < Y,  R##_f[_i] can be either	    \
390169689Skan               (UWtype)-1 or (UWtype)-2.  */				    \
391169689Skan            R##_f[_i] = -1;						    \
392169689Skan            if (!_i)							    \
393169689Skan	      break;							    \
394169689Skan            __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],	    \
395169689Skan			    Y##_f[2], Y##_f[1], Y##_f[0], 0,		    \
396169689Skan			    X##_f[2], X##_f[1], X##_f[0], _n_f[_i]);	    \
397169689Skan            _FP_FRAC_SUB_4(X, Y, X);					    \
398169689Skan            if (X##_f[3] > Y##_f[3])					    \
399169689Skan              {								    \
400169689Skan                R##_f[_i] = -2;						    \
401169689Skan                _FP_FRAC_ADD_4(X, Y, X);				    \
402169689Skan              }								    \
403169689Skan          }								    \
404169689Skan        else								    \
405169689Skan          {								    \
406169689Skan            udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]);  \
407169689Skan            umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0],		    \
408169689Skan			  R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]);	    \
409169689Skan            X##_f[2] = X##_f[1];					    \
410169689Skan            X##_f[1] = X##_f[0];					    \
411169689Skan            X##_f[0] = _n_f[_i];					    \
412169689Skan            if (_FP_FRAC_GT_4(_m, X))					    \
413169689Skan              {								    \
414169689Skan                R##_f[_i]--;						    \
415169689Skan                _FP_FRAC_ADD_4(X, Y, X);				    \
416169689Skan                if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X))	    \
417169689Skan                  {							    \
418169689Skan		    R##_f[_i]--;					    \
419169689Skan		    _FP_FRAC_ADD_4(X, Y, X);				    \
420169689Skan                  }							    \
421169689Skan              }								    \
422169689Skan            _FP_FRAC_DEC_4(X, _m);					    \
423169689Skan            if (!_i)							    \
424169689Skan	      {								    \
425169689Skan		if (!_FP_FRAC_EQ_4(X, _m))				    \
426169689Skan		  R##_f[0] |= _FP_WORK_STICKY;				    \
427169689Skan		break;							    \
428169689Skan	      }								    \
429169689Skan          }								    \
430169689Skan      }									    \
431169689Skan  } while (0)
432169689Skan
433169689Skan
434169689Skan/*
435169689Skan * Square root algorithms:
436169689Skan * We have just one right now, maybe Newton approximation
437169689Skan * should be added for those machines where division is fast.
438169689Skan */
439169689Skan
440169689Skan#define _FP_SQRT_MEAT_4(R, S, T, X, q)				\
441169689Skan  do {								\
442169689Skan    while (q)							\
443169689Skan      {								\
444169689Skan	T##_f[3] = S##_f[3] + q;				\
445169689Skan	if (T##_f[3] <= X##_f[3])				\
446169689Skan	  {							\
447169689Skan	    S##_f[3] = T##_f[3] + q;				\
448169689Skan	    X##_f[3] -= T##_f[3];				\
449169689Skan	    R##_f[3] += q;					\
450169689Skan	  }							\
451169689Skan	_FP_FRAC_SLL_4(X, 1);					\
452169689Skan	q >>= 1;						\
453169689Skan      }								\
454169689Skan    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
455169689Skan    while (q)							\
456169689Skan      {								\
457169689Skan	T##_f[2] = S##_f[2] + q;				\
458169689Skan	T##_f[3] = S##_f[3];					\
459169689Skan	if (T##_f[3] < X##_f[3] || 				\
460169689Skan	    (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))	\
461169689Skan	  {							\
462169689Skan	    S##_f[2] = T##_f[2] + q;				\
463169689Skan	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
464169689Skan	    __FP_FRAC_DEC_2(X##_f[3], X##_f[2],			\
465169689Skan			    T##_f[3], T##_f[2]);		\
466169689Skan	    R##_f[2] += q;					\
467169689Skan	  }							\
468169689Skan	_FP_FRAC_SLL_4(X, 1);					\
469169689Skan	q >>= 1;						\
470169689Skan      }								\
471169689Skan    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
472169689Skan    while (q)							\
473169689Skan      {								\
474169689Skan	T##_f[1] = S##_f[1] + q;				\
475169689Skan	T##_f[2] = S##_f[2];					\
476169689Skan	T##_f[3] = S##_f[3];					\
477169689Skan	if (T##_f[3] < X##_f[3] || 				\
478169689Skan	    (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] ||	\
479169689Skan	     (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1]))))	\
480169689Skan	  {							\
481169689Skan	    S##_f[1] = T##_f[1] + q;				\
482169689Skan	    S##_f[2] += (T##_f[1] > S##_f[1]);			\
483169689Skan	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
484169689Skan	    __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1],	\
485169689Skan	    		    T##_f[3], T##_f[2], T##_f[1]);	\
486169689Skan	    R##_f[1] += q;					\
487169689Skan	  }							\
488169689Skan	_FP_FRAC_SLL_4(X, 1);					\
489169689Skan	q >>= 1;						\
490169689Skan      }								\
491169689Skan    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
492169689Skan    while (q != _FP_WORK_ROUND)					\
493169689Skan      {								\
494169689Skan	T##_f[0] = S##_f[0] + q;				\
495169689Skan	T##_f[1] = S##_f[1];					\
496169689Skan	T##_f[2] = S##_f[2];					\
497169689Skan	T##_f[3] = S##_f[3];					\
498169689Skan	if (_FP_FRAC_GE_4(X,T))					\
499169689Skan	  {							\
500169689Skan	    S##_f[0] = T##_f[0] + q;				\
501169689Skan	    S##_f[1] += (T##_f[0] > S##_f[0]);			\
502169689Skan	    S##_f[2] += (T##_f[1] > S##_f[1]);			\
503169689Skan	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
504169689Skan	    _FP_FRAC_DEC_4(X, T);				\
505169689Skan	    R##_f[0] += q;					\
506169689Skan	  }							\
507169689Skan	_FP_FRAC_SLL_4(X, 1);					\
508169689Skan	q >>= 1;						\
509169689Skan      }								\
510169689Skan    if (!_FP_FRAC_ZEROP_4(X))					\
511169689Skan      {								\
512169689Skan	if (_FP_FRAC_GT_4(X,S))					\
513169689Skan	  R##_f[0] |= _FP_WORK_ROUND;				\
514169689Skan	R##_f[0] |= _FP_WORK_STICKY;				\
515169689Skan      }								\
516169689Skan  } while (0)
517169689Skan
518169689Skan
519169689Skan/*
520169689Skan * Internals
521169689Skan */
522169689Skan
523169689Skan#define __FP_FRAC_SET_4(X,I3,I2,I1,I0)					\
524169689Skan  (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
525169689Skan
526169689Skan#ifndef __FP_FRAC_ADD_3
527169689Skan#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
528169689Skan  do {								\
529169689Skan    _FP_W_TYPE _c1, _c2;					\
530169689Skan    r0 = x0 + y0;						\
531169689Skan    _c1 = r0 < x0;						\
532169689Skan    r1 = x1 + y1;						\
533169689Skan    _c2 = r1 < x1;						\
534169689Skan    r1 += _c1;							\
535169689Skan    _c2 |= r1 < _c1;						\
536169689Skan    r2 = x2 + y2 + _c2;						\
537169689Skan  } while (0)
538169689Skan#endif
539169689Skan
540169689Skan#ifndef __FP_FRAC_ADD_4
541169689Skan#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
542169689Skan  do {								\
543169689Skan    _FP_W_TYPE _c1, _c2, _c3;					\
544169689Skan    r0 = x0 + y0;						\
545169689Skan    _c1 = r0 < x0;						\
546169689Skan    r1 = x1 + y1;						\
547169689Skan    _c2 = r1 < x1;						\
548169689Skan    r1 += _c1;							\
549169689Skan    _c2 |= r1 < _c1;						\
550169689Skan    r2 = x2 + y2;						\
551169689Skan    _c3 = r2 < x2;						\
552169689Skan    r2 += _c2;							\
553169689Skan    _c3 |= r2 < _c2;						\
554169689Skan    r3 = x3 + y3 + _c3;						\
555169689Skan  } while (0)
556169689Skan#endif
557169689Skan
558169689Skan#ifndef __FP_FRAC_SUB_3
559169689Skan#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
560169689Skan  do {								\
561169689Skan    _FP_W_TYPE _c1, _c2;					\
562169689Skan    r0 = x0 - y0;						\
563169689Skan    _c1 = r0 > x0;						\
564169689Skan    r1 = x1 - y1;						\
565169689Skan    _c2 = r1 > x1;						\
566169689Skan    r1 -= _c1;							\
567169689Skan    _c2 |= _c1 && (y1 == x1);					\
568169689Skan    r2 = x2 - y2 - _c2;						\
569169689Skan  } while (0)
570169689Skan#endif
571169689Skan
572169689Skan#ifndef __FP_FRAC_SUB_4
573169689Skan#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
574169689Skan  do {								\
575169689Skan    _FP_W_TYPE _c1, _c2, _c3;					\
576169689Skan    r0 = x0 - y0;						\
577169689Skan    _c1 = r0 > x0;						\
578169689Skan    r1 = x1 - y1;						\
579169689Skan    _c2 = r1 > x1;						\
580169689Skan    r1 -= _c1;							\
581169689Skan    _c2 |= _c1 && (y1 == x1);					\
582169689Skan    r2 = x2 - y2;						\
583169689Skan    _c3 = r2 > x2;						\
584169689Skan    r2 -= _c2;							\
585169689Skan    _c3 |= _c2 && (y2 == x2);					\
586169689Skan    r3 = x3 - y3 - _c3;						\
587169689Skan  } while (0)
588169689Skan#endif
589169689Skan
590169689Skan#ifndef __FP_FRAC_DEC_3
591169689Skan#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0)				\
592169689Skan  do {									\
593169689Skan    UWtype _t0, _t1, _t2;						\
594169689Skan    _t0 = x0, _t1 = x1, _t2 = x2;					\
595169689Skan    __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0);		\
596169689Skan  } while (0)
597169689Skan#endif
598169689Skan
599169689Skan#ifndef __FP_FRAC_DEC_4
600169689Skan#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0)			\
601169689Skan  do {									\
602169689Skan    UWtype _t0, _t1, _t2, _t3;						\
603169689Skan    _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3;				\
604169689Skan    __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0);		\
605169689Skan  } while (0)
606169689Skan#endif
607169689Skan
608169689Skan#ifndef __FP_FRAC_ADDI_4
609169689Skan#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)					\
610169689Skan  do {									\
611169689Skan    UWtype _t;								\
612169689Skan    _t = ((x0 += i) < i);						\
613169689Skan    x1 += _t; _t = (x1 < _t);						\
614169689Skan    x2 += _t; _t = (x2 < _t);						\
615169689Skan    x3 += _t;								\
616169689Skan  } while (0)
617169689Skan#endif
618169689Skan
619169689Skan/* Convert FP values between word sizes. This appears to be more
620169689Skan * complicated than I'd have expected it to be, so these might be
621169689Skan * wrong... These macros are in any case somewhat bogus because they
622169689Skan * use information about what various FRAC_n variables look like
623169689Skan * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
624169689Skan * the ones in op-2.h and op-1.h.
625169689Skan */
626169689Skan#define _FP_FRAC_COPY_1_4(D, S)		(D##_f = S##_f[0])
627169689Skan
628169689Skan#define _FP_FRAC_COPY_2_4(D, S)			\
629169689Skando {						\
630169689Skan  D##_f0 = S##_f[0];				\
631169689Skan  D##_f1 = S##_f[1];				\
632169689Skan} while (0)
633169689Skan
634169689Skan/* Assembly/disassembly for converting to/from integral types.
635169689Skan * No shifting or overflow handled here.
636169689Skan */
637169689Skan/* Put the FP value X into r, which is an integer of size rsize. */
638169689Skan#define _FP_FRAC_ASSEMBLE_4(r, X, rsize)				\
639169689Skan  do {									\
640169689Skan    if (rsize <= _FP_W_TYPE_SIZE)					\
641169689Skan      r = X##_f[0];							\
642169689Skan    else if (rsize <= 2*_FP_W_TYPE_SIZE)				\
643169689Skan    {									\
644169689Skan      r = X##_f[1];							\
645169689Skan      r <<= _FP_W_TYPE_SIZE;						\
646169689Skan      r += X##_f[0];							\
647169689Skan    }									\
648169689Skan    else								\
649169689Skan    {									\
650169689Skan      /* I'm feeling lazy so we deal with int == 3words (implausible)*/	\
651169689Skan      /* and int == 4words as a single case.			 */	\
652169689Skan      r = X##_f[3];							\
653169689Skan      r <<= _FP_W_TYPE_SIZE;						\
654169689Skan      r += X##_f[2];							\
655169689Skan      r <<= _FP_W_TYPE_SIZE;						\
656169689Skan      r += X##_f[1];							\
657169689Skan      r <<= _FP_W_TYPE_SIZE;						\
658169689Skan      r += X##_f[0];							\
659169689Skan    }									\
660169689Skan  } while (0)
661169689Skan
662169689Skan/* "No disassemble Number Five!" */
663169689Skan/* move an integer of size rsize into X's fractional part. We rely on
664169689Skan * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
665169689Skan * having to mask the values we store into it.
666169689Skan */
667169689Skan#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)				\
668169689Skan  do {									\
669169689Skan    X##_f[0] = r;							\
670169689Skan    X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
671169689Skan    X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
672169689Skan    X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
673169689Skan  } while (0);
674169689Skan
675169689Skan#define _FP_FRAC_COPY_4_1(D, S)			\
676169689Skando {						\
677169689Skan  D##_f[0] = S##_f;				\
678169689Skan  D##_f[1] = D##_f[2] = D##_f[3] = 0;		\
679169689Skan} while (0)
680169689Skan
681169689Skan#define _FP_FRAC_COPY_4_2(D, S)			\
682169689Skando {						\
683169689Skan  D##_f[0] = S##_f0;				\
684169689Skan  D##_f[1] = S##_f1;				\
685169689Skan  D##_f[2] = D##_f[3] = 0;			\
686169689Skan} while (0)
687171825Skan
688171825Skan#define _FP_FRAC_COPY_4_4(D,S)	_FP_FRAC_COPY_4(D,S)
689