1/* Software floating-point emulation.
2   Definitions for IEEE Extended Precision.
3   Copyright (C) 1999,2006,2007 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5   Contributed by Jakub Jelinek (jj@ultra.linux.cz).
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   In addition to the permissions in the GNU Lesser General Public
13   License, the Free Software Foundation gives you unlimited
14   permission to link the compiled version of this file into
15   combinations with other programs, and to distribute those
16   combinations without any restriction coming from the use of this
17   file.  (The Lesser General Public License restrictions do apply in
18   other respects; for example, they cover modification of the file,
19   and distribution when not linked into a combine executable.)
20
21   The GNU C Library is distributed in the hope that it will be useful,
22   but WITHOUT ANY WARRANTY; without even the implied warranty of
23   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24   Lesser General Public License for more details.
25
26   You should have received a copy of the GNU Lesser General Public
27   License along with the GNU C Library; if not, write to the Free
28   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
29   MA 02110-1301, USA.  */
30
31#if _FP_W_TYPE_SIZE < 32
32#error "Here's a nickel, kid. Go buy yourself a real computer."
33#endif
34
35#if _FP_W_TYPE_SIZE < 64
36#define _FP_FRACTBITS_E         (4*_FP_W_TYPE_SIZE)
37#else
38#define _FP_FRACTBITS_E		(2*_FP_W_TYPE_SIZE)
39#endif
40
41#define _FP_FRACBITS_E		64
42#define _FP_FRACXBITS_E		(_FP_FRACTBITS_E - _FP_FRACBITS_E)
43#define _FP_WFRACBITS_E		(_FP_WORKBITS + _FP_FRACBITS_E)
44#define _FP_WFRACXBITS_E	(_FP_FRACTBITS_E - _FP_WFRACBITS_E)
45#define _FP_EXPBITS_E		15
46#define _FP_EXPBIAS_E		16383
47#define _FP_EXPMAX_E		32767
48
49#define _FP_QNANBIT_E		\
50	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2) % _FP_W_TYPE_SIZE)
51#define _FP_QNANBIT_SH_E		\
52	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
53#define _FP_IMPLBIT_E		\
54	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1) % _FP_W_TYPE_SIZE)
55#define _FP_IMPLBIT_SH_E		\
56	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
57#define _FP_OVERFLOW_E		\
58	((_FP_W_TYPE)1 << (_FP_WFRACBITS_E % _FP_W_TYPE_SIZE))
59
60typedef float XFtype __attribute__((mode(XF)));
61
62#if _FP_W_TYPE_SIZE < 64
63
64union _FP_UNION_E
65{
66   XFtype flt;
67   struct
68   {
69#if __BYTE_ORDER == __BIG_ENDIAN
70      unsigned long pad1 : _FP_W_TYPE_SIZE;
71      unsigned long pad2 : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
72      unsigned long sign : 1;
73      unsigned long exp : _FP_EXPBITS_E;
74      unsigned long frac1 : _FP_W_TYPE_SIZE;
75      unsigned long frac0 : _FP_W_TYPE_SIZE;
76#else
77      unsigned long frac0 : _FP_W_TYPE_SIZE;
78      unsigned long frac1 : _FP_W_TYPE_SIZE;
79      unsigned exp : _FP_EXPBITS_E;
80      unsigned sign : 1;
81#endif /* not bigendian */
82   } bits __attribute__((packed));
83};
84
85
86#define FP_DECL_E(X)		_FP_DECL(4,X)
87
88#define FP_UNPACK_RAW_E(X, val)				\
89  do {							\
90    union _FP_UNION_E _flo; _flo.flt = (val);		\
91							\
92    X##_f[2] = 0; X##_f[3] = 0;				\
93    X##_f[0] = _flo.bits.frac0;				\
94    X##_f[1] = _flo.bits.frac1;				\
95    X##_e  = _flo.bits.exp;				\
96    X##_s  = _flo.bits.sign;				\
97  } while (0)
98
99#define FP_UNPACK_RAW_EP(X, val)			\
100  do {							\
101    union _FP_UNION_E *_flo =				\
102    (union _FP_UNION_E *)(val);				\
103							\
104    X##_f[2] = 0; X##_f[3] = 0;				\
105    X##_f[0] = _flo->bits.frac0;			\
106    X##_f[1] = _flo->bits.frac1;			\
107    X##_e  = _flo->bits.exp;				\
108    X##_s  = _flo->bits.sign;				\
109  } while (0)
110
111#define FP_PACK_RAW_E(val, X)				\
112  do {							\
113    union _FP_UNION_E _flo;				\
114							\
115    if (X##_e) X##_f[1] |= _FP_IMPLBIT_E;		\
116    else X##_f[1] &= ~(_FP_IMPLBIT_E);			\
117    _flo.bits.frac0 = X##_f[0];				\
118    _flo.bits.frac1 = X##_f[1];				\
119    _flo.bits.exp   = X##_e;				\
120    _flo.bits.sign  = X##_s;				\
121							\
122    (val) = _flo.flt;					\
123  } while (0)
124
125#define FP_PACK_RAW_EP(val, X)				\
126  do {							\
127    if (!FP_INHIBIT_RESULTS)				\
128      {							\
129	union _FP_UNION_E *_flo =			\
130	  (union _FP_UNION_E *)(val);			\
131							\
132	if (X##_e) X##_f[1] |= _FP_IMPLBIT_E;		\
133	else X##_f[1] &= ~(_FP_IMPLBIT_E);		\
134	_flo->bits.frac0 = X##_f[0];			\
135	_flo->bits.frac1 = X##_f[1];			\
136	_flo->bits.exp   = X##_e;			\
137	_flo->bits.sign  = X##_s;			\
138      }							\
139  } while (0)
140
141#define FP_UNPACK_E(X,val)		\
142  do {					\
143    FP_UNPACK_RAW_E(X,val);		\
144    _FP_UNPACK_CANONICAL(E,4,X);	\
145  } while (0)
146
147#define FP_UNPACK_EP(X,val)		\
148  do {					\
149    FP_UNPACK_RAW_EP(X,val);		\
150    _FP_UNPACK_CANONICAL(E,4,X);	\
151  } while (0)
152
153#define FP_UNPACK_SEMIRAW_E(X,val)	\
154  do {					\
155    FP_UNPACK_RAW_E(X,val);		\
156    _FP_UNPACK_SEMIRAW(E,4,X);		\
157  } while (0)
158
159#define FP_UNPACK_SEMIRAW_EP(X,val)	\
160  do {					\
161    FP_UNPACK_RAW_EP(X,val);		\
162    _FP_UNPACK_SEMIRAW(E,4,X);		\
163  } while (0)
164
165#define FP_PACK_E(val,X)		\
166  do {					\
167    _FP_PACK_CANONICAL(E,4,X);		\
168    FP_PACK_RAW_E(val,X);		\
169  } while (0)
170
171#define FP_PACK_EP(val,X)		\
172  do {					\
173    _FP_PACK_CANONICAL(E,4,X);		\
174    FP_PACK_RAW_EP(val,X);		\
175  } while (0)
176
177#define FP_PACK_SEMIRAW_E(val,X)	\
178  do {					\
179    _FP_PACK_SEMIRAW(E,4,X);		\
180    FP_PACK_RAW_E(val,X);		\
181  } while (0)
182
183#define FP_PACK_SEMIRAW_EP(val,X)	\
184  do {					\
185    _FP_PACK_SEMIRAW(E,4,X);		\
186    FP_PACK_RAW_EP(val,X);		\
187  } while (0)
188
189#define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN(E,4,X)
190#define FP_NEG_E(R,X)		_FP_NEG(E,4,R,X)
191#define FP_ADD_E(R,X,Y)		_FP_ADD(E,4,R,X,Y)
192#define FP_SUB_E(R,X,Y)		_FP_SUB(E,4,R,X,Y)
193#define FP_MUL_E(R,X,Y)		_FP_MUL(E,4,R,X,Y)
194#define FP_DIV_E(R,X,Y)		_FP_DIV(E,4,R,X,Y)
195#define FP_SQRT_E(R,X)		_FP_SQRT(E,4,R,X)
196
197/*
198 * Square root algorithms:
199 * We have just one right now, maybe Newton approximation
200 * should be added for those machines where division is fast.
201 * This has special _E version because standard _4 square
202 * root would not work (it has to start normally with the
203 * second word and not the first), but as we have to do it
204 * anyway, we optimize it by doing most of the calculations
205 * in two UWtype registers instead of four.
206 */
207
208#define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
209  do {							\
210    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
211    _FP_FRAC_SRL_4(X, (_FP_WORKBITS));			\
212    while (q)						\
213      {							\
214	T##_f[1] = S##_f[1] + q;			\
215	if (T##_f[1] <= X##_f[1])			\
216	  {						\
217	    S##_f[1] = T##_f[1] + q;			\
218	    X##_f[1] -= T##_f[1];			\
219	    R##_f[1] += q;				\
220	  }						\
221	_FP_FRAC_SLL_2(X, 1);				\
222	q >>= 1;					\
223      }							\
224    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
225    while (q)						\
226      {							\
227	T##_f[0] = S##_f[0] + q;			\
228	T##_f[1] = S##_f[1];				\
229	if (T##_f[1] < X##_f[1] || 			\
230	    (T##_f[1] == X##_f[1] &&			\
231	     T##_f[0] <= X##_f[0]))			\
232	  {						\
233	    S##_f[0] = T##_f[0] + q;			\
234	    S##_f[1] += (T##_f[0] > S##_f[0]);		\
235	    _FP_FRAC_DEC_2(X, T);			\
236	    R##_f[0] += q;				\
237	  }						\
238	_FP_FRAC_SLL_2(X, 1);				\
239	q >>= 1;					\
240      }							\
241    _FP_FRAC_SLL_4(R, (_FP_WORKBITS));			\
242    if (X##_f[0] | X##_f[1])				\
243      {							\
244	if (S##_f[1] < X##_f[1] || 			\
245	    (S##_f[1] == X##_f[1] &&			\
246	     S##_f[0] < X##_f[0]))			\
247	  R##_f[0] |= _FP_WORK_ROUND;			\
248	R##_f[0] |= _FP_WORK_STICKY;			\
249      }							\
250  } while (0)
251
252#define FP_CMP_E(r,X,Y,un)	_FP_CMP(E,4,r,X,Y,un)
253#define FP_CMP_EQ_E(r,X,Y)	_FP_CMP_EQ(E,4,r,X,Y)
254#define FP_CMP_UNORD_E(r,X,Y)	_FP_CMP_UNORD(E,4,r,X,Y)
255
256#define FP_TO_INT_E(r,X,rsz,rsg)	_FP_TO_INT(E,4,r,X,rsz,rsg)
257#define FP_FROM_INT_E(X,r,rs,rt)	_FP_FROM_INT(E,4,X,r,rs,rt)
258
259#define _FP_FRAC_HIGH_E(X)	(X##_f[2])
260#define _FP_FRAC_HIGH_RAW_E(X)	(X##_f[1])
261
262#else   /* not _FP_W_TYPE_SIZE < 64 */
263union _FP_UNION_E
264{
265  XFtype flt;
266  struct {
267#if __BYTE_ORDER == __BIG_ENDIAN
268    _FP_W_TYPE pad  : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
269    unsigned sign   : 1;
270    unsigned exp    : _FP_EXPBITS_E;
271    _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
272#else
273    _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
274    unsigned exp    : _FP_EXPBITS_E;
275    unsigned sign   : 1;
276#endif
277  } bits;
278};
279
280#define FP_DECL_E(X)		_FP_DECL(2,X)
281
282#define FP_UNPACK_RAW_E(X, val)					\
283  do {								\
284    union _FP_UNION_E _flo; _flo.flt = (val);			\
285								\
286    X##_f0 = _flo.bits.frac;					\
287    X##_f1 = 0;							\
288    X##_e = _flo.bits.exp;					\
289    X##_s = _flo.bits.sign;					\
290  } while (0)
291
292#define FP_UNPACK_RAW_EP(X, val)				\
293  do {								\
294    union _FP_UNION_E *_flo =					\
295      (union _FP_UNION_E *)(val);				\
296								\
297    X##_f0 = _flo->bits.frac;					\
298    X##_f1 = 0;							\
299    X##_e = _flo->bits.exp;					\
300    X##_s = _flo->bits.sign;					\
301  } while (0)
302
303#define FP_PACK_RAW_E(val, X)					\
304  do {								\
305    union _FP_UNION_E _flo;					\
306								\
307    if (X##_e) X##_f0 |= _FP_IMPLBIT_E;				\
308    else X##_f0 &= ~(_FP_IMPLBIT_E);				\
309    _flo.bits.frac = X##_f0;					\
310    _flo.bits.exp  = X##_e;					\
311    _flo.bits.sign = X##_s;					\
312								\
313    (val) = _flo.flt;						\
314  } while (0)
315
316#define FP_PACK_RAW_EP(fs, val, X)				\
317  do {								\
318    if (!FP_INHIBIT_RESULTS)					\
319      {								\
320	union _FP_UNION_E *_flo =				\
321	  (union _FP_UNION_E *)(val);				\
322								\
323	if (X##_e) X##_f0 |= _FP_IMPLBIT_E;			\
324	else X##_f0 &= ~(_FP_IMPLBIT_E);			\
325	_flo->bits.frac = X##_f0;				\
326	_flo->bits.exp  = X##_e;				\
327	_flo->bits.sign = X##_s;				\
328      }								\
329  } while (0)
330
331
332#define FP_UNPACK_E(X,val)		\
333  do {					\
334    FP_UNPACK_RAW_E(X,val);		\
335    _FP_UNPACK_CANONICAL(E,2,X);	\
336  } while (0)
337
338#define FP_UNPACK_EP(X,val)		\
339  do {					\
340    FP_UNPACK_RAW_EP(X,val);		\
341    _FP_UNPACK_CANONICAL(E,2,X);	\
342  } while (0)
343
344#define FP_UNPACK_SEMIRAW_E(X,val)	\
345  do {					\
346    FP_UNPACK_RAW_E(X,val);		\
347    _FP_UNPACK_SEMIRAW(E,2,X);		\
348  } while (0)
349
350#define FP_UNPACK_SEMIRAW_EP(X,val)	\
351  do {					\
352    FP_UNPACK_RAW_EP(X,val);		\
353    _FP_UNPACK_SEMIRAW(E,2,X);		\
354  } while (0)
355
356#define FP_PACK_E(val,X)		\
357  do {					\
358    _FP_PACK_CANONICAL(E,2,X);		\
359    FP_PACK_RAW_E(val,X);		\
360  } while (0)
361
362#define FP_PACK_EP(val,X)		\
363  do {					\
364    _FP_PACK_CANONICAL(E,2,X);		\
365    FP_PACK_RAW_EP(val,X);		\
366  } while (0)
367
368#define FP_PACK_SEMIRAW_E(val,X)	\
369  do {					\
370    _FP_PACK_SEMIRAW(E,2,X);		\
371    FP_PACK_RAW_E(val,X);		\
372  } while (0)
373
374#define FP_PACK_SEMIRAW_EP(val,X)	\
375  do {					\
376    _FP_PACK_SEMIRAW(E,2,X);		\
377    FP_PACK_RAW_EP(val,X);		\
378  } while (0)
379
380#define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN(E,2,X)
381#define FP_NEG_E(R,X)		_FP_NEG(E,2,R,X)
382#define FP_ADD_E(R,X,Y)		_FP_ADD(E,2,R,X,Y)
383#define FP_SUB_E(R,X,Y)		_FP_SUB(E,2,R,X,Y)
384#define FP_MUL_E(R,X,Y)		_FP_MUL(E,2,R,X,Y)
385#define FP_DIV_E(R,X,Y)		_FP_DIV(E,2,R,X,Y)
386#define FP_SQRT_E(R,X)		_FP_SQRT(E,2,R,X)
387
388/*
389 * Square root algorithms:
390 * We have just one right now, maybe Newton approximation
391 * should be added for those machines where division is fast.
392 * We optimize it by doing most of the calculations
393 * in one UWtype registers instead of two, although we don't
394 * have to.
395 */
396#define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
397  do {							\
398    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
399    _FP_FRAC_SRL_2(X, (_FP_WORKBITS));			\
400    while (q)						\
401      {							\
402        T##_f0 = S##_f0 + q;				\
403        if (T##_f0 <= X##_f0)				\
404          {						\
405            S##_f0 = T##_f0 + q;			\
406            X##_f0 -= T##_f0;				\
407            R##_f0 += q;				\
408          }						\
409        _FP_FRAC_SLL_1(X, 1);				\
410        q >>= 1;					\
411      }							\
412    _FP_FRAC_SLL_2(R, (_FP_WORKBITS));			\
413    if (X##_f0)						\
414      {							\
415	if (S##_f0 < X##_f0)				\
416	  R##_f0 |= _FP_WORK_ROUND;			\
417	R##_f0 |= _FP_WORK_STICKY;			\
418      }							\
419  } while (0)
420
421#define FP_CMP_E(r,X,Y,un)	_FP_CMP(E,2,r,X,Y,un)
422#define FP_CMP_EQ_E(r,X,Y)	_FP_CMP_EQ(E,2,r,X,Y)
423#define FP_CMP_UNORD_E(r,X,Y)	_FP_CMP_UNORD(E,2,r,X,Y)
424
425#define FP_TO_INT_E(r,X,rsz,rsg)	_FP_TO_INT(E,2,r,X,rsz,rsg)
426#define FP_FROM_INT_E(X,r,rs,rt)	_FP_FROM_INT(E,2,X,r,rs,rt)
427
428#define _FP_FRAC_HIGH_E(X)	(X##_f1)
429#define _FP_FRAC_HIGH_RAW_E(X)	(X##_f0)
430
431#endif /* not _FP_W_TYPE_SIZE < 64 */
432