extended.h revision 169689
1/* Software floating-point emulation.
2   Definitions for IEEE Extended Precision.
3   Copyright (C) 1999,2006 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5   Contributed by Jakub Jelinek (jj@ultra.linux.cz).
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   In addition to the permissions in the GNU Lesser General Public
13   License, the Free Software Foundation gives you unlimited
14   permission to link the compiled version of this file into
15   combinations with other programs, and to distribute those
16   combinations without any restriction coming from the use of this
17   file.  (The Lesser General Public License restrictions do apply in
18   other respects; for example, they cover modification of the file,
19   and distribution when not linked into a combine executable.)
20
21   The GNU C Library is distributed in the hope that it will be useful,
22   but WITHOUT ANY WARRANTY; without even the implied warranty of
23   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24   Lesser General Public License for more details.
25
26   You should have received a copy of the GNU Lesser General Public
27   License along with the GNU C Library; if not, write to the Free
28   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
29   MA 02110-1301, USA.  */
30
31#if _FP_W_TYPE_SIZE < 32
32#error "Here's a nickel, kid. Go buy yourself a real computer."
33#endif
34
35#if _FP_W_TYPE_SIZE < 64
36#define _FP_FRACTBITS_E         (4*_FP_W_TYPE_SIZE)
37#else
38#define _FP_FRACTBITS_E		(2*_FP_W_TYPE_SIZE)
39#endif
40
41#define _FP_FRACBITS_E		64
42#define _FP_FRACXBITS_E		(_FP_FRACTBITS_E - _FP_FRACBITS_E)
43#define _FP_WFRACBITS_E		(_FP_WORKBITS + _FP_FRACBITS_E)
44#define _FP_WFRACXBITS_E	(_FP_FRACTBITS_E - _FP_WFRACBITS_E)
45#define _FP_EXPBITS_E		15
46#define _FP_EXPBIAS_E		16383
47#define _FP_EXPMAX_E		32767
48
49#define _FP_QNANBIT_E		\
50	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2) % _FP_W_TYPE_SIZE)
51#define _FP_QNANBIT_SH_E		\
52	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
53#define _FP_IMPLBIT_E		\
54	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1) % _FP_W_TYPE_SIZE)
55#define _FP_IMPLBIT_SH_E		\
56	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
57#define _FP_OVERFLOW_E		\
58	((_FP_W_TYPE)1 << (_FP_WFRACBITS_E % _FP_W_TYPE_SIZE))
59
60typedef float XFtype __attribute__((mode(XF)));
61
62#if _FP_W_TYPE_SIZE < 64
63
64union _FP_UNION_E
65{
66   XFtype flt;
67   struct
68   {
69#if __BYTE_ORDER == __BIG_ENDIAN
70      unsigned long pad1 : _FP_W_TYPE_SIZE;
71      unsigned long pad2 : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
72      unsigned long sign : 1;
73      unsigned long exp : _FP_EXPBITS_E;
74      unsigned long frac1 : _FP_W_TYPE_SIZE;
75      unsigned long frac0 : _FP_W_TYPE_SIZE;
76#else
77      unsigned long frac0 : _FP_W_TYPE_SIZE;
78      unsigned long frac1 : _FP_W_TYPE_SIZE;
79      unsigned exp : _FP_EXPBITS_E;
80      unsigned sign : 1;
81#endif /* not bigendian */
82   } bits __attribute__((packed));
83};
84
85
86#define FP_DECL_E(X)		_FP_DECL(4,X)
87
88#define FP_UNPACK_RAW_E(X, val)				\
89  do {							\
90    union _FP_UNION_E _flo; _flo.flt = (val);		\
91							\
92    X##_f[2] = 0; X##_f[3] = 0;				\
93    X##_f[0] = _flo.bits.frac0;				\
94    X##_f[1] = _flo.bits.frac1;				\
95    X##_e  = _flo.bits.exp;				\
96    X##_s  = _flo.bits.sign;				\
97    if (!X##_e && (X##_f[1] || X##_f[0])		\
98        && !(X##_f[1] & _FP_IMPLBIT_E))			\
99      {							\
100        X##_e++;					\
101        FP_SET_EXCEPTION(FP_EX_DENORM);			\
102      }							\
103  } while (0)
104
105#define FP_UNPACK_RAW_EP(X, val)			\
106  do {							\
107    union _FP_UNION_E *_flo =				\
108    (union _FP_UNION_E *)(val);				\
109							\
110    X##_f[2] = 0; X##_f[3] = 0;				\
111    X##_f[0] = _flo->bits.frac0;			\
112    X##_f[1] = _flo->bits.frac1;			\
113    X##_e  = _flo->bits.exp;				\
114    X##_s  = _flo->bits.sign;				\
115    if (!X##_e && (X##_f[1] || X##_f[0])		\
116        && !(X##_f[1] & _FP_IMPLBIT_E))			\
117      {							\
118        X##_e++;					\
119        FP_SET_EXCEPTION(FP_EX_DENORM);			\
120      }							\
121  } while (0)
122
123#define FP_PACK_RAW_E(val, X)				\
124  do {							\
125    union _FP_UNION_E _flo;				\
126							\
127    if (X##_e) X##_f[1] |= _FP_IMPLBIT_E;		\
128    else X##_f[1] &= ~(_FP_IMPLBIT_E);			\
129    _flo.bits.frac0 = X##_f[0];				\
130    _flo.bits.frac1 = X##_f[1];				\
131    _flo.bits.exp   = X##_e;				\
132    _flo.bits.sign  = X##_s;				\
133							\
134    (val) = _flo.flt;					\
135  } while (0)
136
137#define FP_PACK_RAW_EP(val, X)				\
138  do {							\
139    if (!FP_INHIBIT_RESULTS)				\
140      {							\
141	union _FP_UNION_E *_flo =			\
142	  (union _FP_UNION_E *)(val);			\
143							\
144	if (X##_e) X##_f[1] |= _FP_IMPLBIT_E;		\
145	else X##_f[1] &= ~(_FP_IMPLBIT_E);		\
146	_flo->bits.frac0 = X##_f[0];			\
147	_flo->bits.frac1 = X##_f[1];			\
148	_flo->bits.exp   = X##_e;			\
149	_flo->bits.sign  = X##_s;			\
150      }							\
151  } while (0)
152
153#define FP_UNPACK_E(X,val)		\
154  do {					\
155    FP_UNPACK_RAW_E(X,val);		\
156    _FP_UNPACK_CANONICAL(E,4,X);	\
157  } while (0)
158
159#define FP_UNPACK_EP(X,val)		\
160  do {					\
161    FP_UNPACK_RAW_EP(X,val);		\
162    _FP_UNPACK_CANONICAL(E,4,X);	\
163  } while (0)
164
165#define FP_UNPACK_SEMIRAW_E(X,val)	\
166  do {					\
167    _FP_UNPACK_RAW_E(X,val);		\
168    _FP_UNPACK_SEMIRAW(E,4,X);		\
169  } while (0)
170
171#define FP_UNPACK_SEMIRAW_EP(X,val)	\
172  do {					\
173    _FP_UNPACK_RAW_EP(X,val);		\
174    _FP_UNPACK_SEMIRAW(E,4,X);		\
175  } while (0)
176
177#define FP_PACK_E(val,X)		\
178  do {					\
179    _FP_PACK_CANONICAL(E,4,X);		\
180    FP_PACK_RAW_E(val,X);		\
181  } while (0)
182
183#define FP_PACK_EP(val,X)		\
184  do {					\
185    _FP_PACK_CANONICAL(E,4,X);		\
186    FP_PACK_RAW_EP(val,X);		\
187  } while (0)
188
189#define FP_PACK_SEMIRAW_E(val,X)	\
190  do {					\
191    _FP_PACK_SEMIRAW(E,4,X);		\
192    _FP_PACK_RAW_E(val,X);		\
193  } while (0)
194
195#define FP_PACK_SEMIRAW_EP(val,X)	\
196  do {					\
197    _FP_PACK_SEMIRAW(E,4,X);		\
198    _FP_PACK_RAW_EP(val,X);		\
199  } while (0)
200
201#define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN(E,4,X)
202#define FP_NEG_E(R,X)		_FP_NEG(E,4,R,X)
203#define FP_ADD_E(R,X,Y)		_FP_ADD(E,4,R,X,Y)
204#define FP_SUB_E(R,X,Y)		_FP_SUB(E,4,R,X,Y)
205#define FP_MUL_E(R,X,Y)		_FP_MUL(E,4,R,X,Y)
206#define FP_DIV_E(R,X,Y)		_FP_DIV(E,4,R,X,Y)
207#define FP_SQRT_E(R,X)		_FP_SQRT(E,4,R,X)
208
209/*
210 * Square root algorithms:
211 * We have just one right now, maybe Newton approximation
212 * should be added for those machines where division is fast.
213 * This has special _E version because standard _4 square
214 * root would not work (it has to start normally with the
215 * second word and not the first), but as we have to do it
216 * anyway, we optimize it by doing most of the calculations
217 * in two UWtype registers instead of four.
218 */
219
220#define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
221  do {							\
222    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
223    _FP_FRAC_SRL_4(X, (_FP_WORKBITS));			\
224    while (q)						\
225      {							\
226	T##_f[1] = S##_f[1] + q;			\
227	if (T##_f[1] <= X##_f[1])			\
228	  {						\
229	    S##_f[1] = T##_f[1] + q;			\
230	    X##_f[1] -= T##_f[1];			\
231	    R##_f[1] += q;				\
232	  }						\
233	_FP_FRAC_SLL_2(X, 1);				\
234	q >>= 1;					\
235      }							\
236    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
237    while (q)						\
238      {							\
239	T##_f[0] = S##_f[0] + q;			\
240	T##_f[1] = S##_f[1];				\
241	if (T##_f[1] < X##_f[1] || 			\
242	    (T##_f[1] == X##_f[1] &&			\
243	     T##_f[0] <= X##_f[0]))			\
244	  {						\
245	    S##_f[0] = T##_f[0] + q;			\
246	    S##_f[1] += (T##_f[0] > S##_f[0]);		\
247	    _FP_FRAC_DEC_2(X, T);			\
248	    R##_f[0] += q;				\
249	  }						\
250	_FP_FRAC_SLL_2(X, 1);				\
251	q >>= 1;					\
252      }							\
253    _FP_FRAC_SLL_4(R, (_FP_WORKBITS));			\
254    if (X##_f[0] | X##_f[1])				\
255      {							\
256	if (S##_f[1] < X##_f[1] || 			\
257	    (S##_f[1] == X##_f[1] &&			\
258	     S##_f[0] < X##_f[0]))			\
259	  R##_f[0] |= _FP_WORK_ROUND;			\
260	R##_f[0] |= _FP_WORK_STICKY;			\
261      }							\
262  } while (0)
263
264#define FP_CMP_E(r,X,Y,un)	_FP_CMP(E,4,r,X,Y,un)
265#define FP_CMP_EQ_E(r,X,Y)	_FP_CMP_EQ(E,4,r,X,Y)
266#define FP_CMP_UNORD_E(r,X,Y)	_FP_CMP_UNORD(E,4,r,X,Y)
267
268#define FP_TO_INT_E(r,X,rsz,rsg)	_FP_TO_INT(E,4,r,X,rsz,rsg)
269#define FP_FROM_INT_E(X,r,rs,rt)	_FP_FROM_INT(E,4,X,r,rs,rt)
270
271#define _FP_FRAC_HIGH_E(X)	(X##_f[2])
272#define _FP_FRAC_HIGH_RAW_E(X)	(X##_f[1])
273
274#else   /* not _FP_W_TYPE_SIZE < 64 */
275union _FP_UNION_E
276{
277  XFtype flt;
278  struct {
279#if __BYTE_ORDER == __BIG_ENDIAN
280    unsigned long pad : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
281    unsigned sign  : 1;
282    unsigned exp   : _FP_EXPBITS_E;
283    unsigned long frac : _FP_W_TYPE_SIZE;
284#else
285    unsigned long frac : _FP_W_TYPE_SIZE;
286    unsigned exp   : _FP_EXPBITS_E;
287    unsigned sign  : 1;
288#endif
289  } bits;
290};
291
292#define FP_DECL_E(X)		_FP_DECL(2,X)
293
294#define FP_UNPACK_RAW_E(X, val)					\
295  do {								\
296    union _FP_UNION_E _flo; _flo.flt = (val);			\
297								\
298    X##_f0 = _flo.bits.frac;					\
299    X##_f1 = 0;							\
300    X##_e = _flo.bits.exp;					\
301    X##_s = _flo.bits.sign;					\
302    if (!X##_e && X##_f0 && !(X##_f0 & _FP_IMPLBIT_E))		\
303      {								\
304        X##_e++;						\
305        FP_SET_EXCEPTION(FP_EX_DENORM);				\
306      }								\
307  } while (0)
308
309#define FP_UNPACK_RAW_EP(X, val)				\
310  do {								\
311    union _FP_UNION_E *_flo =					\
312      (union _FP_UNION_E *)(val);				\
313								\
314    X##_f0 = _flo->bits.frac;					\
315    X##_f1 = 0;							\
316    X##_e = _flo->bits.exp;					\
317    X##_s = _flo->bits.sign;					\
318    if (!X##_e && X##_f0 && !(X##_f0 & _FP_IMPLBIT_E))		\
319      {								\
320        X##_e++;						\
321        FP_SET_EXCEPTION(FP_EX_DENORM);				\
322      }								\
323  } while (0)
324
325#define FP_PACK_RAW_E(val, X)					\
326  do {								\
327    union _FP_UNION_E _flo;					\
328								\
329    if (X##_e) X##_f0 |= _FP_IMPLBIT_E;				\
330    else X##_f0 &= ~(_FP_IMPLBIT_E);				\
331    _flo.bits.frac = X##_f0;					\
332    _flo.bits.exp  = X##_e;					\
333    _flo.bits.sign = X##_s;					\
334								\
335    (val) = _flo.flt;						\
336  } while (0)
337
338#define FP_PACK_RAW_EP(fs, val, X)				\
339  do {								\
340    if (!FP_INHIBIT_RESULTS)					\
341      {								\
342	union _FP_UNION_E *_flo =				\
343	  (union _FP_UNION_E *)(val);				\
344								\
345	if (X##_e) X##_f0 |= _FP_IMPLBIT_E;			\
346	else X##_f0 &= ~(_FP_IMPLBIT_E);			\
347	_flo->bits.frac = X##_f0;				\
348	_flo->bits.exp  = X##_e;				\
349	_flo->bits.sign = X##_s;				\
350      }								\
351  } while (0)
352
353
354#define FP_UNPACK_E(X,val)		\
355  do {					\
356    FP_UNPACK_RAW_E(X,val);		\
357    _FP_UNPACK_CANONICAL(E,2,X);	\
358  } while (0)
359
360#define FP_UNPACK_EP(X,val)		\
361  do {					\
362    FP_UNPACK_RAW_EP(X,val);		\
363    _FP_UNPACK_CANONICAL(E,2,X);	\
364  } while (0)
365
366#define FP_UNPACK_SEMIRAW_E(X,val)	\
367  do {					\
368    _FP_UNPACK_RAW_E(X,val);		\
369    _FP_UNPACK_SEMIRAW(E,2,X);		\
370  } while (0)
371
372#define FP_UNPACK_SEMIRAW_EP(X,val)	\
373  do {					\
374    _FP_UNPACK_RAW_EP(X,val);		\
375    _FP_UNPACK_SEMIRAW(E,2,X);		\
376  } while (0)
377
378#define FP_PACK_E(val,X)		\
379  do {					\
380    _FP_PACK_CANONICAL(E,2,X);		\
381    FP_PACK_RAW_E(val,X);		\
382  } while (0)
383
384#define FP_PACK_EP(val,X)		\
385  do {					\
386    _FP_PACK_CANONICAL(E,2,X);		\
387    FP_PACK_RAW_EP(val,X);		\
388  } while (0)
389
390#define FP_PACK_SEMIRAW_E(val,X)	\
391  do {					\
392    _FP_PACK_SEMIRAW(E,2,X);		\
393    _FP_PACK_RAW_E(val,X);		\
394  } while (0)
395
396#define FP_PACK_SEMIRAW_EP(val,X)	\
397  do {					\
398    _FP_PACK_SEMIRAW(E,2,X);		\
399    _FP_PACK_RAW_EP(val,X);		\
400  } while (0)
401
402#define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN(E,2,X)
403#define FP_NEG_E(R,X)		_FP_NEG(E,2,R,X)
404#define FP_ADD_E(R,X,Y)		_FP_ADD(E,2,R,X,Y)
405#define FP_SUB_E(R,X,Y)		_FP_SUB(E,2,R,X,Y)
406#define FP_MUL_E(R,X,Y)		_FP_MUL(E,2,R,X,Y)
407#define FP_DIV_E(R,X,Y)		_FP_DIV(E,2,R,X,Y)
408#define FP_SQRT_E(R,X)		_FP_SQRT(E,2,R,X)
409
410/*
411 * Square root algorithms:
412 * We have just one right now, maybe Newton approximation
413 * should be added for those machines where division is fast.
414 * We optimize it by doing most of the calculations
415 * in one UWtype registers instead of two, although we don't
416 * have to.
417 */
418#define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
419  do {							\
420    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
421    _FP_FRAC_SRL_2(X, (_FP_WORKBITS));			\
422    while (q)						\
423      {							\
424        T##_f0 = S##_f0 + q;				\
425        if (T##_f0 <= X##_f0)				\
426          {						\
427            S##_f0 = T##_f0 + q;			\
428            X##_f0 -= T##_f0;				\
429            R##_f0 += q;				\
430          }						\
431        _FP_FRAC_SLL_1(X, 1);				\
432        q >>= 1;					\
433      }							\
434    _FP_FRAC_SLL_2(R, (_FP_WORKBITS));			\
435    if (X##_f0)						\
436      {							\
437	if (S##_f0 < X##_f0)				\
438	  R##_f0 |= _FP_WORK_ROUND;			\
439	R##_f0 |= _FP_WORK_STICKY;			\
440      }							\
441  } while (0)
442
443#define FP_CMP_E(r,X,Y,un)	_FP_CMP(E,2,r,X,Y,un)
444#define FP_CMP_EQ_E(r,X,Y)	_FP_CMP_EQ(E,2,r,X,Y)
445#define FP_CMP_UNORD_E(r,X,Y)	_FP_CMP_UNORD(E,2,r,X,Y)
446
447#define FP_TO_INT_E(r,X,rsz,rsg)	_FP_TO_INT(E,2,r,X,rsz,rsg)
448#define FP_FROM_INT_E(X,r,rs,rt)	_FP_FROM_INT(E,2,X,r,rs,rt)
449
450#define _FP_FRAC_HIGH_E(X)	(X##_f1)
451#define _FP_FRAC_HIGH_RAW_E(X)	(X##_f0)
452
453#endif /* not _FP_W_TYPE_SIZE < 64 */
454