• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/include/math-emu/
1/* Software floating-point emulation.
2   Basic two-word fraction declaration and manipulation.
3   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5   Contributed by Richard Henderson (rth@cygnus.com),
6		  Jakub Jelinek (jj@ultra.linux.cz),
7		  David S. Miller (davem@redhat.com) and
8		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10   The GNU C Library is free software; you can redistribute it and/or
11   modify it under the terms of the GNU Library General Public License as
12   published by the Free Software Foundation; either version 2 of the
13   License, or (at your option) any later version.
14
15   The GNU C Library is distributed in the hope that it will be useful,
16   but WITHOUT ANY WARRANTY; without even the implied warranty of
17   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18   Library General Public License for more details.
19
20   You should have received a copy of the GNU Library General Public
21   License along with the GNU C Library; see the file COPYING.LIB.  If
22   not, write to the Free Software Foundation, Inc.,
23   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
24
25#ifndef __MATH_EMU_OP_2_H__
26#define __MATH_EMU_OP_2_H__
27
28#define _FP_FRAC_DECL_2(X)	_FP_W_TYPE X##_f0 = 0, X##_f1 = 0
29#define _FP_FRAC_COPY_2(D,S)	(D##_f0 = S##_f0, D##_f1 = S##_f1)
30#define _FP_FRAC_SET_2(X,I)	__FP_FRAC_SET_2(X, I)
31#define _FP_FRAC_HIGH_2(X)	(X##_f1)
32#define _FP_FRAC_LOW_2(X)	(X##_f0)
33#define _FP_FRAC_WORD_2(X,w)	(X##_f##w)
34
35#define _FP_FRAC_SLL_2(X,N)						\
36  do {									\
37    if ((N) < _FP_W_TYPE_SIZE)						\
38      {									\
39	if (__builtin_constant_p(N) && (N) == 1) 			\
40	  {								\
41	    X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);	\
42	    X##_f0 += X##_f0;						\
43	  }								\
44	else								\
45	  {								\
46	    X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N));	\
47	    X##_f0 <<= (N);						\
48	  }								\
49      }									\
50    else								\
51      {									\
52	X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);			\
53	X##_f0 = 0;							\
54      }									\
55  } while (0)
56
57#define _FP_FRAC_SRL_2(X,N)						\
58  do {									\
59    if ((N) < _FP_W_TYPE_SIZE)						\
60      {									\
61	X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));	\
62	X##_f1 >>= (N);							\
63      }									\
64    else								\
65      {									\
66	X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);			\
67	X##_f1 = 0;							\
68      }									\
69  } while (0)
70
71/* Right shift with sticky-lsb.  */
72#define _FP_FRAC_SRS_2(X,N,sz)						\
73  do {									\
74    if ((N) < _FP_W_TYPE_SIZE)						\
75      {									\
76	X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) |	\
77		  (__builtin_constant_p(N) && (N) == 1			\
78		   ? X##_f0 & 1						\
79		   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));	\
80	X##_f1 >>= (N);							\
81      }									\
82    else								\
83      {									\
84	X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |			\
85		(((X##_f1 << (2*_FP_W_TYPE_SIZE - (N))) | X##_f0) != 0)); \
86	X##_f1 = 0;							\
87      }									\
88  } while (0)
89
90#define _FP_FRAC_ADDI_2(X,I)	\
91  __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
92
93#define _FP_FRAC_ADD_2(R,X,Y)	\
94  __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
95
96#define _FP_FRAC_SUB_2(R,X,Y)	\
97  __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
98
99#define _FP_FRAC_DEC_2(X,Y)	\
100  __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
101
102#define _FP_FRAC_CLZ_2(R,X)	\
103  do {				\
104    if (X##_f1)			\
105      __FP_CLZ(R,X##_f1);	\
106    else 			\
107    {				\
108      __FP_CLZ(R,X##_f0);	\
109      R += _FP_W_TYPE_SIZE;	\
110    }				\
111  } while(0)
112
113/* Predicates */
114#define _FP_FRAC_NEGP_2(X)	((_FP_WS_TYPE)X##_f1 < 0)
115#define _FP_FRAC_ZEROP_2(X)	((X##_f1 | X##_f0) == 0)
116#define _FP_FRAC_OVERP_2(fs,X)	(_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
117#define _FP_FRAC_CLEAR_OVERP_2(fs,X)	(_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
118#define _FP_FRAC_EQ_2(X, Y)	(X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
119#define _FP_FRAC_GT_2(X, Y)	\
120  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
121#define _FP_FRAC_GE_2(X, Y)	\
122  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
123
124#define _FP_ZEROFRAC_2		0, 0
125#define _FP_MINFRAC_2		0, 1
126#define _FP_MAXFRAC_2		(~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
127
128/*
129 * Internals
130 */
131
132#define __FP_FRAC_SET_2(X,I1,I0)	(X##_f0 = I0, X##_f1 = I1)
133
134#define __FP_CLZ_2(R, xh, xl)	\
135  do {				\
136    if (xh)			\
137      __FP_CLZ(R,xh);		\
138    else 			\
139    {				\
140      __FP_CLZ(R,xl);		\
141      R += _FP_W_TYPE_SIZE;	\
142    }				\
143  } while(0)
144
145
146#undef __FP_FRAC_ADDI_2
147#define __FP_FRAC_ADDI_2(xh, xl, i)	add_ssaaaa(xh, xl, xh, xl, 0, i)
148#undef __FP_FRAC_ADD_2
149#define __FP_FRAC_ADD_2			add_ssaaaa
150#undef __FP_FRAC_SUB_2
151#define __FP_FRAC_SUB_2			sub_ddmmss
152#undef __FP_FRAC_DEC_2
153#define __FP_FRAC_DEC_2(xh, xl, yh, yl)	sub_ddmmss(xh, xl, xh, xl, yh, yl)
154
155/*
156 * Unpack the raw bits of a native fp value.  Do not classify or
157 * normalize the data.
158 */
159
160#define _FP_UNPACK_RAW_2(fs, X, val)			\
161  do {							\
162    union _FP_UNION_##fs _flo; _flo.flt = (val);	\
163							\
164    X##_f0 = _flo.bits.frac0;				\
165    X##_f1 = _flo.bits.frac1;				\
166    X##_e  = _flo.bits.exp;				\
167    X##_s  = _flo.bits.sign;				\
168  } while (0)
169
170#define _FP_UNPACK_RAW_2_P(fs, X, val)			\
171  do {							\
172    union _FP_UNION_##fs *_flo =			\
173      (union _FP_UNION_##fs *)(val);			\
174							\
175    X##_f0 = _flo->bits.frac0;				\
176    X##_f1 = _flo->bits.frac1;				\
177    X##_e  = _flo->bits.exp;				\
178    X##_s  = _flo->bits.sign;				\
179  } while (0)
180
181
182/*
183 * Repack the raw bits of a native fp value.
184 */
185
186#define _FP_PACK_RAW_2(fs, val, X)			\
187  do {							\
188    union _FP_UNION_##fs _flo;				\
189							\
190    _flo.bits.frac0 = X##_f0;				\
191    _flo.bits.frac1 = X##_f1;				\
192    _flo.bits.exp   = X##_e;				\
193    _flo.bits.sign  = X##_s;				\
194							\
195    (val) = _flo.flt;					\
196  } while (0)
197
198#define _FP_PACK_RAW_2_P(fs, val, X)			\
199  do {							\
200    union _FP_UNION_##fs *_flo =			\
201      (union _FP_UNION_##fs *)(val);			\
202							\
203    _flo->bits.frac0 = X##_f0;				\
204    _flo->bits.frac1 = X##_f1;				\
205    _flo->bits.exp   = X##_e;				\
206    _flo->bits.sign  = X##_s;				\
207  } while (0)
208
209
210/*
211 * Multiplication algorithms:
212 */
213
214/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
215
216#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)			\
217  do {									\
218    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
219									\
220    doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);	\
221    doit(_b_f1, _b_f0, X##_f0, Y##_f1);					\
222    doit(_c_f1, _c_f0, X##_f1, Y##_f0);					\
223    doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1);	\
224									\
225    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
226		    _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,		\
227		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
228		    _FP_FRAC_WORD_4(_z,1));				\
229    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
230		    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,		\
231		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
232		    _FP_FRAC_WORD_4(_z,1));				\
233									\
234    /* Normalize since we know where the msb of the multiplicands	\
235       were (bit B), we know that the msb of the of the product is	\
236       at either 2B or 2B-1.  */					\
237    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
238    R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
239    R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
240  } while (0)
241
242/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
243   Do only 3 multiplications instead of four. This one is for machines
244   where multiplication is much more expensive than subtraction.  */
245
246#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)		\
247  do {									\
248    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
249    _FP_W_TYPE _d;							\
250    int _c1, _c2;							\
251									\
252    _b_f0 = X##_f0 + X##_f1;						\
253    _c1 = _b_f0 < X##_f0;						\
254    _b_f1 = Y##_f0 + Y##_f1;						\
255    _c2 = _b_f1 < Y##_f0;						\
256    doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);			\
257    doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);	\
258    doit(_c_f1, _c_f0, X##_f1, Y##_f1);					\
259									\
260    _b_f0 &= -_c2;							\
261    _b_f1 &= -_c1;							\
262    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
263		    _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,		\
264		    0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));	\
265    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
266		     _b_f0);						\
267    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
268		     _b_f1);						\
269    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
270		    _FP_FRAC_WORD_4(_z,1),				\
271		    0, _d, _FP_FRAC_WORD_4(_z,0));			\
272    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
273		    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);		\
274    __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),	\
275		    _c_f1, _c_f0,					\
276		    _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));	\
277									\
278    /* Normalize since we know where the msb of the multiplicands	\
279       were (bit B), we know that the msb of the of the product is	\
280       at either 2B or 2B-1.  */					\
281    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
282    R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
283    R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
284  } while (0)
285
286#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)				\
287  do {									\
288    _FP_FRAC_DECL_4(_z);						\
289    _FP_W_TYPE _x[2], _y[2];						\
290    _x[0] = X##_f0; _x[1] = X##_f1;					\
291    _y[0] = Y##_f0; _y[1] = Y##_f1;					\
292									\
293    mpn_mul_n(_z_f, _x, _y, 2);						\
294									\
295    /* Normalize since we know where the msb of the multiplicands	\
296       were (bit B), we know that the msb of the of the product is	\
297       at either 2B or 2B-1.  */					\
298    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
299    R##_f0 = _z_f[0];							\
300    R##_f1 = _z_f[1];							\
301  } while (0)
302
303/* Do at most 120x120=240 bits multiplication using double floating
304   point multiplication.  This is useful if floating point
305   multiplication has much bigger throughput than integer multiply.
306   It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
307   between 106 and 120 only.
308   Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
309   SETFETZ is a macro which will disable all FPU exceptions and set rounding
310   towards zero,  RESETFE should optionally reset it back.  */
311
312#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe)	\
313  do {										\
314    static const double _const[] = {						\
315      /* 2^-24 */ 5.9604644775390625e-08,					\
316      /* 2^-48 */ 3.5527136788005009e-15,					\
317      /* 2^-72 */ 2.1175823681357508e-22,					\
318      /* 2^-96 */ 1.2621774483536189e-29,					\
319      /* 2^28 */ 2.68435456e+08,						\
320      /* 2^4 */ 1.600000e+01,							\
321      /* 2^-20 */ 9.5367431640625e-07,						\
322      /* 2^-44 */ 5.6843418860808015e-14,					\
323      /* 2^-68 */ 3.3881317890172014e-21,					\
324      /* 2^-92 */ 2.0194839173657902e-28,					\
325      /* 2^-116 */ 1.2037062152420224e-35};					\
326    double _a240, _b240, _c240, _d240, _e240, _f240, 				\
327	   _g240, _h240, _i240, _j240, _k240;					\
328    union { double d; UDItype i; } _l240, _m240, _n240, _o240,			\
329				   _p240, _q240, _r240, _s240;			\
330    UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;			\
331										\
332    if (wfracbits < 106 || wfracbits > 120)					\
333      abort();									\
334										\
335    setfetz;									\
336										\
337    _e240 = (double)(long)(X##_f0 & 0xffffff);					\
338    _j240 = (double)(long)(Y##_f0 & 0xffffff);					\
339    _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);				\
340    _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);				\
341    _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));	\
342    _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));	\
343    _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);				\
344    _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);				\
345    _a240 = (double)(long)(X##_f1 >> 32);					\
346    _f240 = (double)(long)(Y##_f1 >> 32);					\
347    _e240 *= _const[3];								\
348    _j240 *= _const[3];								\
349    _d240 *= _const[2];								\
350    _i240 *= _const[2];								\
351    _c240 *= _const[1];								\
352    _h240 *= _const[1];								\
353    _b240 *= _const[0];								\
354    _g240 *= _const[0];								\
355    _s240.d =							      _e240*_j240;\
356    _r240.d =						_d240*_j240 + _e240*_i240;\
357    _q240.d =				  _c240*_j240 + _d240*_i240 + _e240*_h240;\
358    _p240.d =		    _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
359    _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
360    _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;		\
361    _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;				\
362    _l240.d = _a240*_g240 + _b240*_f240;					\
363    _k240 =   _a240*_f240;							\
364    _r240.d += _s240.d;								\
365    _q240.d += _r240.d;								\
366    _p240.d += _q240.d;								\
367    _o240.d += _p240.d;								\
368    _n240.d += _o240.d;								\
369    _m240.d += _n240.d;								\
370    _l240.d += _m240.d;								\
371    _k240 += _l240.d;								\
372    _s240.d -= ((_const[10]+_s240.d)-_const[10]);				\
373    _r240.d -= ((_const[9]+_r240.d)-_const[9]);					\
374    _q240.d -= ((_const[8]+_q240.d)-_const[8]);					\
375    _p240.d -= ((_const[7]+_p240.d)-_const[7]);					\
376    _o240.d += _const[7];							\
377    _n240.d += _const[6];							\
378    _m240.d += _const[5];							\
379    _l240.d += _const[4];							\
380    if (_s240.d != 0.0) _y240 = 1;						\
381    if (_r240.d != 0.0) _y240 = 1;						\
382    if (_q240.d != 0.0) _y240 = 1;						\
383    if (_p240.d != 0.0) _y240 = 1;						\
384    _t240 = (DItype)_k240;							\
385    _u240 = _l240.i;								\
386    _v240 = _m240.i;								\
387    _w240 = _n240.i;								\
388    _x240 = _o240.i;								\
389    R##_f1 = (_t240 << (128 - (wfracbits - 1)))					\
390	     | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));			\
391    R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))			\
392    	     | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))			\
393    	     | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))			\
394    	     | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))			\
395    	     | _y240;								\
396    resetfe;									\
397  } while (0)
398
399/*
400 * Division algorithms:
401 */
402
403#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)				\
404  do {									\
405    _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;		\
406    if (_FP_FRAC_GT_2(X, Y))						\
407      {									\
408	_n_f2 = X##_f1 >> 1;						\
409	_n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;		\
410	_n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);			\
411      }									\
412    else								\
413      {									\
414	R##_e--;							\
415	_n_f2 = X##_f1;							\
416	_n_f1 = X##_f0;							\
417	_n_f0 = 0;							\
418      }									\
419									\
420    /* Normalize, i.e. make the most significant bit of the 		\
421       denominator set. */						\
422    _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);				\
423									\
424    udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);			\
425    umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);				\
426    _r_f0 = _n_f0;							\
427    if (_FP_FRAC_GT_2(_m, _r))						\
428      {									\
429	R##_f1--;							\
430	_FP_FRAC_ADD_2(_r, Y, _r);					\
431	if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
432	  {								\
433	    R##_f1--;							\
434	    _FP_FRAC_ADD_2(_r, Y, _r);					\
435	  }								\
436      }									\
437    _FP_FRAC_DEC_2(_r, _m);						\
438									\
439    if (_r_f1 == Y##_f1)						\
440      {									\
441	/* This is a special case, not an optimization			\
442	   (_r/Y##_f1 would not fit into UWtype).			\
443	   As _r is guaranteed to be < Y,  R##_f0 can be either		\
444	   (UWtype)-1 or (UWtype)-2.  But as we know what kind		\
445	   of bits it is (sticky, guard, round),  we don't care.	\
446	   We also don't care what the reminder is,  because the	\
447	   guard bit will be set anyway.  -jj */			\
448	R##_f0 = -1;							\
449      }									\
450    else								\
451      {									\
452	udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);		\
453	umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);			\
454	_r_f0 = 0;							\
455	if (_FP_FRAC_GT_2(_m, _r))					\
456	  {								\
457	    R##_f0--;							\
458	    _FP_FRAC_ADD_2(_r, Y, _r);					\
459	    if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
460	      {								\
461		R##_f0--;						\
462		_FP_FRAC_ADD_2(_r, Y, _r);				\
463	      }								\
464	  }								\
465	if (!_FP_FRAC_EQ_2(_r, _m))					\
466	  R##_f0 |= _FP_WORK_STICKY;					\
467      }									\
468  } while (0)
469
470
471#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)					\
472  do {									\
473    _FP_W_TYPE _x[4], _y[2], _z[4];					\
474    _y[0] = Y##_f0; _y[1] = Y##_f1;					\
475    _x[0] = _x[3] = 0;							\
476    if (_FP_FRAC_GT_2(X, Y))						\
477      {									\
478	R##_e++;							\
479	_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |	\
480		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
481			    (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE)));	\
482	_x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE);	\
483      }									\
484    else								\
485      {									\
486	_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) |	\
487		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
488			    (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));	\
489	_x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);	\
490      }									\
491									\
492    (void) mpn_divrem (_z, 0, _x, 4, _y, 2);				\
493    R##_f1 = _z[1];							\
494    R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);				\
495  } while (0)
496
497
498/*
499 * Square root algorithms:
500 * We have just one right now, maybe Newton approximation
501 * should be added for those machines where division is fast.
502 */
503
504#define _FP_SQRT_MEAT_2(R, S, T, X, q)			\
505  do {							\
506    while (q)						\
507      {							\
508	T##_f1 = S##_f1 + q;				\
509	if (T##_f1 <= X##_f1)				\
510	  {						\
511	    S##_f1 = T##_f1 + q;			\
512	    X##_f1 -= T##_f1;				\
513	    R##_f1 += q;				\
514	  }						\
515	_FP_FRAC_SLL_2(X, 1);				\
516	q >>= 1;					\
517      }							\
518    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
519    while (q != _FP_WORK_ROUND)				\
520      {							\
521	T##_f0 = S##_f0 + q;				\
522	T##_f1 = S##_f1;				\
523	if (T##_f1 < X##_f1 || 				\
524	    (T##_f1 == X##_f1 && T##_f0 <= X##_f0))	\
525	  {						\
526	    S##_f0 = T##_f0 + q;			\
527	    S##_f1 += (T##_f0 > S##_f0);		\
528	    _FP_FRAC_DEC_2(X, T);			\
529	    R##_f0 += q;				\
530	  }						\
531	_FP_FRAC_SLL_2(X, 1);				\
532	q >>= 1;					\
533      }							\
534    if (X##_f0 | X##_f1)				\
535      {							\
536	if (S##_f1 < X##_f1 || 				\
537	    (S##_f1 == X##_f1 && S##_f0 < X##_f0))	\
538	  R##_f0 |= _FP_WORK_ROUND;			\
539	R##_f0 |= _FP_WORK_STICKY;			\
540      }							\
541  } while (0)
542
543
544/*
545 * Assembly/disassembly for converting to/from integral types.
546 * No shifting or overflow handled here.
547 */
548
549#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)	\
550  do {						\
551    if (rsize <= _FP_W_TYPE_SIZE)		\
552      r = X##_f0;				\
553    else					\
554      {						\
555	r = X##_f1;				\
556	r <<= _FP_W_TYPE_SIZE;			\
557	r += X##_f0;				\
558      }						\
559  } while (0)
560
561#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)				\
562  do {									\
563    X##_f0 = r;								\
564    X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
565  } while (0)
566
567/*
568 * Convert FP values between word sizes
569 */
570
571#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S)				\
572  do {									\
573    if (S##_c != FP_CLS_NAN)						\
574      _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),	\
575		     _FP_WFRACBITS_##sfs);				\
576    else								\
577      _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs));	\
578    D##_f = S##_f0;							\
579  } while (0)
580
581#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S)				\
582  do {									\
583    D##_f0 = S##_f;							\
584    D##_f1 = 0;								\
585    _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));	\
586  } while (0)
587
588#endif
589