1169689Skan/* Software floating-point emulation.
2169689Skan   Definitions for IEEE Extended Precision.
3171825Skan   Copyright (C) 1999,2006,2007 Free Software Foundation, Inc.
4169689Skan   This file is part of the GNU C Library.
5169689Skan   Contributed by Jakub Jelinek (jj@ultra.linux.cz).
6169689Skan
7169689Skan   The GNU C Library is free software; you can redistribute it and/or
8169689Skan   modify it under the terms of the GNU Lesser General Public
9169689Skan   License as published by the Free Software Foundation; either
10169689Skan   version 2.1 of the License, or (at your option) any later version.
11169689Skan
12169689Skan   In addition to the permissions in the GNU Lesser General Public
13169689Skan   License, the Free Software Foundation gives you unlimited
14169689Skan   permission to link the compiled version of this file into
15169689Skan   combinations with other programs, and to distribute those
16169689Skan   combinations without any restriction coming from the use of this
17169689Skan   file.  (The Lesser General Public License restrictions do apply in
18169689Skan   other respects; for example, they cover modification of the file,
19169689Skan   and distribution when not linked into a combine executable.)
20169689Skan
21169689Skan   The GNU C Library is distributed in the hope that it will be useful,
22169689Skan   but WITHOUT ANY WARRANTY; without even the implied warranty of
23169689Skan   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24169689Skan   Lesser General Public License for more details.
25169689Skan
26169689Skan   You should have received a copy of the GNU Lesser General Public
27169689Skan   License along with the GNU C Library; if not, write to the Free
28169689Skan   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
29169689Skan   MA 02110-1301, USA.  */
30169689Skan
31169689Skan#if _FP_W_TYPE_SIZE < 32
32169689Skan#error "Here's a nickel, kid. Go buy yourself a real computer."
33169689Skan#endif
34169689Skan
35169689Skan#if _FP_W_TYPE_SIZE < 64
36169689Skan#define _FP_FRACTBITS_E         (4*_FP_W_TYPE_SIZE)
37169689Skan#else
38169689Skan#define _FP_FRACTBITS_E		(2*_FP_W_TYPE_SIZE)
39169689Skan#endif
40169689Skan
41169689Skan#define _FP_FRACBITS_E		64
42169689Skan#define _FP_FRACXBITS_E		(_FP_FRACTBITS_E - _FP_FRACBITS_E)
43169689Skan#define _FP_WFRACBITS_E		(_FP_WORKBITS + _FP_FRACBITS_E)
44169689Skan#define _FP_WFRACXBITS_E	(_FP_FRACTBITS_E - _FP_WFRACBITS_E)
45169689Skan#define _FP_EXPBITS_E		15
46169689Skan#define _FP_EXPBIAS_E		16383
47169689Skan#define _FP_EXPMAX_E		32767
48169689Skan
49169689Skan#define _FP_QNANBIT_E		\
50169689Skan	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2) % _FP_W_TYPE_SIZE)
51169689Skan#define _FP_QNANBIT_SH_E		\
52169689Skan	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-2+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
53169689Skan#define _FP_IMPLBIT_E		\
54169689Skan	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1) % _FP_W_TYPE_SIZE)
55169689Skan#define _FP_IMPLBIT_SH_E		\
56169689Skan	((_FP_W_TYPE)1 << (_FP_FRACBITS_E-1+_FP_WORKBITS) % _FP_W_TYPE_SIZE)
57169689Skan#define _FP_OVERFLOW_E		\
58169689Skan	((_FP_W_TYPE)1 << (_FP_WFRACBITS_E % _FP_W_TYPE_SIZE))
59169689Skan
60169689Skantypedef float XFtype __attribute__((mode(XF)));
61169689Skan
62169689Skan#if _FP_W_TYPE_SIZE < 64
63169689Skan
64169689Skanunion _FP_UNION_E
65169689Skan{
66169689Skan   XFtype flt;
67169689Skan   struct
68169689Skan   {
69169689Skan#if __BYTE_ORDER == __BIG_ENDIAN
70169689Skan      unsigned long pad1 : _FP_W_TYPE_SIZE;
71169689Skan      unsigned long pad2 : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
72169689Skan      unsigned long sign : 1;
73169689Skan      unsigned long exp : _FP_EXPBITS_E;
74169689Skan      unsigned long frac1 : _FP_W_TYPE_SIZE;
75169689Skan      unsigned long frac0 : _FP_W_TYPE_SIZE;
76169689Skan#else
77169689Skan      unsigned long frac0 : _FP_W_TYPE_SIZE;
78169689Skan      unsigned long frac1 : _FP_W_TYPE_SIZE;
79169689Skan      unsigned exp : _FP_EXPBITS_E;
80169689Skan      unsigned sign : 1;
81169689Skan#endif /* not bigendian */
82169689Skan   } bits __attribute__((packed));
83169689Skan};
84169689Skan
85169689Skan
86169689Skan#define FP_DECL_E(X)		_FP_DECL(4,X)
87169689Skan
88169689Skan#define FP_UNPACK_RAW_E(X, val)				\
89169689Skan  do {							\
90169689Skan    union _FP_UNION_E _flo; _flo.flt = (val);		\
91169689Skan							\
92169689Skan    X##_f[2] = 0; X##_f[3] = 0;				\
93169689Skan    X##_f[0] = _flo.bits.frac0;				\
94169689Skan    X##_f[1] = _flo.bits.frac1;				\
95169689Skan    X##_e  = _flo.bits.exp;				\
96169689Skan    X##_s  = _flo.bits.sign;				\
97169689Skan  } while (0)
98169689Skan
99169689Skan#define FP_UNPACK_RAW_EP(X, val)			\
100169689Skan  do {							\
101169689Skan    union _FP_UNION_E *_flo =				\
102169689Skan    (union _FP_UNION_E *)(val);				\
103169689Skan							\
104169689Skan    X##_f[2] = 0; X##_f[3] = 0;				\
105169689Skan    X##_f[0] = _flo->bits.frac0;			\
106169689Skan    X##_f[1] = _flo->bits.frac1;			\
107169689Skan    X##_e  = _flo->bits.exp;				\
108169689Skan    X##_s  = _flo->bits.sign;				\
109169689Skan  } while (0)
110169689Skan
111169689Skan#define FP_PACK_RAW_E(val, X)				\
112169689Skan  do {							\
113169689Skan    union _FP_UNION_E _flo;				\
114169689Skan							\
115169689Skan    if (X##_e) X##_f[1] |= _FP_IMPLBIT_E;		\
116169689Skan    else X##_f[1] &= ~(_FP_IMPLBIT_E);			\
117169689Skan    _flo.bits.frac0 = X##_f[0];				\
118169689Skan    _flo.bits.frac1 = X##_f[1];				\
119169689Skan    _flo.bits.exp   = X##_e;				\
120169689Skan    _flo.bits.sign  = X##_s;				\
121169689Skan							\
122169689Skan    (val) = _flo.flt;					\
123169689Skan  } while (0)
124169689Skan
125169689Skan#define FP_PACK_RAW_EP(val, X)				\
126169689Skan  do {							\
127169689Skan    if (!FP_INHIBIT_RESULTS)				\
128169689Skan      {							\
129169689Skan	union _FP_UNION_E *_flo =			\
130169689Skan	  (union _FP_UNION_E *)(val);			\
131169689Skan							\
132169689Skan	if (X##_e) X##_f[1] |= _FP_IMPLBIT_E;		\
133169689Skan	else X##_f[1] &= ~(_FP_IMPLBIT_E);		\
134169689Skan	_flo->bits.frac0 = X##_f[0];			\
135169689Skan	_flo->bits.frac1 = X##_f[1];			\
136169689Skan	_flo->bits.exp   = X##_e;			\
137169689Skan	_flo->bits.sign  = X##_s;			\
138169689Skan      }							\
139169689Skan  } while (0)
140169689Skan
141169689Skan#define FP_UNPACK_E(X,val)		\
142169689Skan  do {					\
143169689Skan    FP_UNPACK_RAW_E(X,val);		\
144169689Skan    _FP_UNPACK_CANONICAL(E,4,X);	\
145169689Skan  } while (0)
146169689Skan
147169689Skan#define FP_UNPACK_EP(X,val)		\
148169689Skan  do {					\
149169689Skan    FP_UNPACK_RAW_EP(X,val);		\
150169689Skan    _FP_UNPACK_CANONICAL(E,4,X);	\
151169689Skan  } while (0)
152169689Skan
153169689Skan#define FP_UNPACK_SEMIRAW_E(X,val)	\
154169689Skan  do {					\
155171825Skan    FP_UNPACK_RAW_E(X,val);		\
156169689Skan    _FP_UNPACK_SEMIRAW(E,4,X);		\
157169689Skan  } while (0)
158169689Skan
159169689Skan#define FP_UNPACK_SEMIRAW_EP(X,val)	\
160169689Skan  do {					\
161171825Skan    FP_UNPACK_RAW_EP(X,val);		\
162169689Skan    _FP_UNPACK_SEMIRAW(E,4,X);		\
163169689Skan  } while (0)
164169689Skan
165169689Skan#define FP_PACK_E(val,X)		\
166169689Skan  do {					\
167169689Skan    _FP_PACK_CANONICAL(E,4,X);		\
168169689Skan    FP_PACK_RAW_E(val,X);		\
169169689Skan  } while (0)
170169689Skan
171169689Skan#define FP_PACK_EP(val,X)		\
172169689Skan  do {					\
173169689Skan    _FP_PACK_CANONICAL(E,4,X);		\
174169689Skan    FP_PACK_RAW_EP(val,X);		\
175169689Skan  } while (0)
176169689Skan
177169689Skan#define FP_PACK_SEMIRAW_E(val,X)	\
178169689Skan  do {					\
179169689Skan    _FP_PACK_SEMIRAW(E,4,X);		\
180171825Skan    FP_PACK_RAW_E(val,X);		\
181169689Skan  } while (0)
182169689Skan
183169689Skan#define FP_PACK_SEMIRAW_EP(val,X)	\
184169689Skan  do {					\
185169689Skan    _FP_PACK_SEMIRAW(E,4,X);		\
186171825Skan    FP_PACK_RAW_EP(val,X);		\
187169689Skan  } while (0)
188169689Skan
189169689Skan#define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN(E,4,X)
190169689Skan#define FP_NEG_E(R,X)		_FP_NEG(E,4,R,X)
191169689Skan#define FP_ADD_E(R,X,Y)		_FP_ADD(E,4,R,X,Y)
192169689Skan#define FP_SUB_E(R,X,Y)		_FP_SUB(E,4,R,X,Y)
193169689Skan#define FP_MUL_E(R,X,Y)		_FP_MUL(E,4,R,X,Y)
194169689Skan#define FP_DIV_E(R,X,Y)		_FP_DIV(E,4,R,X,Y)
195169689Skan#define FP_SQRT_E(R,X)		_FP_SQRT(E,4,R,X)
196169689Skan
197169689Skan/*
198169689Skan * Square root algorithms:
199169689Skan * We have just one right now, maybe Newton approximation
200169689Skan * should be added for those machines where division is fast.
201169689Skan * This has special _E version because standard _4 square
202169689Skan * root would not work (it has to start normally with the
203169689Skan * second word and not the first), but as we have to do it
204169689Skan * anyway, we optimize it by doing most of the calculations
205169689Skan * in two UWtype registers instead of four.
206169689Skan */
207169689Skan
208169689Skan#define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
209169689Skan  do {							\
210169689Skan    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
211169689Skan    _FP_FRAC_SRL_4(X, (_FP_WORKBITS));			\
212169689Skan    while (q)						\
213169689Skan      {							\
214169689Skan	T##_f[1] = S##_f[1] + q;			\
215169689Skan	if (T##_f[1] <= X##_f[1])			\
216169689Skan	  {						\
217169689Skan	    S##_f[1] = T##_f[1] + q;			\
218169689Skan	    X##_f[1] -= T##_f[1];			\
219169689Skan	    R##_f[1] += q;				\
220169689Skan	  }						\
221169689Skan	_FP_FRAC_SLL_2(X, 1);				\
222169689Skan	q >>= 1;					\
223169689Skan      }							\
224169689Skan    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
225169689Skan    while (q)						\
226169689Skan      {							\
227169689Skan	T##_f[0] = S##_f[0] + q;			\
228169689Skan	T##_f[1] = S##_f[1];				\
229169689Skan	if (T##_f[1] < X##_f[1] || 			\
230169689Skan	    (T##_f[1] == X##_f[1] &&			\
231169689Skan	     T##_f[0] <= X##_f[0]))			\
232169689Skan	  {						\
233169689Skan	    S##_f[0] = T##_f[0] + q;			\
234169689Skan	    S##_f[1] += (T##_f[0] > S##_f[0]);		\
235169689Skan	    _FP_FRAC_DEC_2(X, T);			\
236169689Skan	    R##_f[0] += q;				\
237169689Skan	  }						\
238169689Skan	_FP_FRAC_SLL_2(X, 1);				\
239169689Skan	q >>= 1;					\
240169689Skan      }							\
241169689Skan    _FP_FRAC_SLL_4(R, (_FP_WORKBITS));			\
242169689Skan    if (X##_f[0] | X##_f[1])				\
243169689Skan      {							\
244169689Skan	if (S##_f[1] < X##_f[1] || 			\
245169689Skan	    (S##_f[1] == X##_f[1] &&			\
246169689Skan	     S##_f[0] < X##_f[0]))			\
247169689Skan	  R##_f[0] |= _FP_WORK_ROUND;			\
248169689Skan	R##_f[0] |= _FP_WORK_STICKY;			\
249169689Skan      }							\
250169689Skan  } while (0)
251169689Skan
252169689Skan#define FP_CMP_E(r,X,Y,un)	_FP_CMP(E,4,r,X,Y,un)
253169689Skan#define FP_CMP_EQ_E(r,X,Y)	_FP_CMP_EQ(E,4,r,X,Y)
254169689Skan#define FP_CMP_UNORD_E(r,X,Y)	_FP_CMP_UNORD(E,4,r,X,Y)
255169689Skan
256169689Skan#define FP_TO_INT_E(r,X,rsz,rsg)	_FP_TO_INT(E,4,r,X,rsz,rsg)
257169689Skan#define FP_FROM_INT_E(X,r,rs,rt)	_FP_FROM_INT(E,4,X,r,rs,rt)
258169689Skan
259169689Skan#define _FP_FRAC_HIGH_E(X)	(X##_f[2])
260169689Skan#define _FP_FRAC_HIGH_RAW_E(X)	(X##_f[1])
261169689Skan
262169689Skan#else   /* not _FP_W_TYPE_SIZE < 64 */
263169689Skanunion _FP_UNION_E
264169689Skan{
265169689Skan  XFtype flt;
266169689Skan  struct {
267169689Skan#if __BYTE_ORDER == __BIG_ENDIAN
268171825Skan    _FP_W_TYPE pad  : (_FP_W_TYPE_SIZE - 1 - _FP_EXPBITS_E);
269171825Skan    unsigned sign   : 1;
270171825Skan    unsigned exp    : _FP_EXPBITS_E;
271171825Skan    _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
272169689Skan#else
273171825Skan    _FP_W_TYPE frac : _FP_W_TYPE_SIZE;
274171825Skan    unsigned exp    : _FP_EXPBITS_E;
275171825Skan    unsigned sign   : 1;
276169689Skan#endif
277169689Skan  } bits;
278169689Skan};
279169689Skan
280169689Skan#define FP_DECL_E(X)		_FP_DECL(2,X)
281169689Skan
282169689Skan#define FP_UNPACK_RAW_E(X, val)					\
283169689Skan  do {								\
284169689Skan    union _FP_UNION_E _flo; _flo.flt = (val);			\
285169689Skan								\
286169689Skan    X##_f0 = _flo.bits.frac;					\
287169689Skan    X##_f1 = 0;							\
288169689Skan    X##_e = _flo.bits.exp;					\
289169689Skan    X##_s = _flo.bits.sign;					\
290169689Skan  } while (0)
291169689Skan
292169689Skan#define FP_UNPACK_RAW_EP(X, val)				\
293169689Skan  do {								\
294169689Skan    union _FP_UNION_E *_flo =					\
295169689Skan      (union _FP_UNION_E *)(val);				\
296169689Skan								\
297169689Skan    X##_f0 = _flo->bits.frac;					\
298169689Skan    X##_f1 = 0;							\
299169689Skan    X##_e = _flo->bits.exp;					\
300169689Skan    X##_s = _flo->bits.sign;					\
301169689Skan  } while (0)
302169689Skan
303169689Skan#define FP_PACK_RAW_E(val, X)					\
304169689Skan  do {								\
305169689Skan    union _FP_UNION_E _flo;					\
306169689Skan								\
307169689Skan    if (X##_e) X##_f0 |= _FP_IMPLBIT_E;				\
308169689Skan    else X##_f0 &= ~(_FP_IMPLBIT_E);				\
309169689Skan    _flo.bits.frac = X##_f0;					\
310169689Skan    _flo.bits.exp  = X##_e;					\
311169689Skan    _flo.bits.sign = X##_s;					\
312169689Skan								\
313169689Skan    (val) = _flo.flt;						\
314169689Skan  } while (0)
315169689Skan
316169689Skan#define FP_PACK_RAW_EP(fs, val, X)				\
317169689Skan  do {								\
318169689Skan    if (!FP_INHIBIT_RESULTS)					\
319169689Skan      {								\
320169689Skan	union _FP_UNION_E *_flo =				\
321169689Skan	  (union _FP_UNION_E *)(val);				\
322169689Skan								\
323169689Skan	if (X##_e) X##_f0 |= _FP_IMPLBIT_E;			\
324169689Skan	else X##_f0 &= ~(_FP_IMPLBIT_E);			\
325169689Skan	_flo->bits.frac = X##_f0;				\
326169689Skan	_flo->bits.exp  = X##_e;				\
327169689Skan	_flo->bits.sign = X##_s;				\
328169689Skan      }								\
329169689Skan  } while (0)
330169689Skan
331169689Skan
332169689Skan#define FP_UNPACK_E(X,val)		\
333169689Skan  do {					\
334169689Skan    FP_UNPACK_RAW_E(X,val);		\
335169689Skan    _FP_UNPACK_CANONICAL(E,2,X);	\
336169689Skan  } while (0)
337169689Skan
338169689Skan#define FP_UNPACK_EP(X,val)		\
339169689Skan  do {					\
340169689Skan    FP_UNPACK_RAW_EP(X,val);		\
341169689Skan    _FP_UNPACK_CANONICAL(E,2,X);	\
342169689Skan  } while (0)
343169689Skan
344169689Skan#define FP_UNPACK_SEMIRAW_E(X,val)	\
345169689Skan  do {					\
346171825Skan    FP_UNPACK_RAW_E(X,val);		\
347169689Skan    _FP_UNPACK_SEMIRAW(E,2,X);		\
348169689Skan  } while (0)
349169689Skan
350169689Skan#define FP_UNPACK_SEMIRAW_EP(X,val)	\
351169689Skan  do {					\
352171825Skan    FP_UNPACK_RAW_EP(X,val);		\
353169689Skan    _FP_UNPACK_SEMIRAW(E,2,X);		\
354169689Skan  } while (0)
355169689Skan
356169689Skan#define FP_PACK_E(val,X)		\
357169689Skan  do {					\
358169689Skan    _FP_PACK_CANONICAL(E,2,X);		\
359169689Skan    FP_PACK_RAW_E(val,X);		\
360169689Skan  } while (0)
361169689Skan
362169689Skan#define FP_PACK_EP(val,X)		\
363169689Skan  do {					\
364169689Skan    _FP_PACK_CANONICAL(E,2,X);		\
365169689Skan    FP_PACK_RAW_EP(val,X);		\
366169689Skan  } while (0)
367169689Skan
368169689Skan#define FP_PACK_SEMIRAW_E(val,X)	\
369169689Skan  do {					\
370169689Skan    _FP_PACK_SEMIRAW(E,2,X);		\
371171825Skan    FP_PACK_RAW_E(val,X);		\
372169689Skan  } while (0)
373169689Skan
374169689Skan#define FP_PACK_SEMIRAW_EP(val,X)	\
375169689Skan  do {					\
376169689Skan    _FP_PACK_SEMIRAW(E,2,X);		\
377171825Skan    FP_PACK_RAW_EP(val,X);		\
378169689Skan  } while (0)
379169689Skan
380169689Skan#define FP_ISSIGNAN_E(X)	_FP_ISSIGNAN(E,2,X)
381169689Skan#define FP_NEG_E(R,X)		_FP_NEG(E,2,R,X)
382169689Skan#define FP_ADD_E(R,X,Y)		_FP_ADD(E,2,R,X,Y)
383169689Skan#define FP_SUB_E(R,X,Y)		_FP_SUB(E,2,R,X,Y)
384169689Skan#define FP_MUL_E(R,X,Y)		_FP_MUL(E,2,R,X,Y)
385169689Skan#define FP_DIV_E(R,X,Y)		_FP_DIV(E,2,R,X,Y)
386169689Skan#define FP_SQRT_E(R,X)		_FP_SQRT(E,2,R,X)
387169689Skan
388169689Skan/*
389169689Skan * Square root algorithms:
390169689Skan * We have just one right now, maybe Newton approximation
391169689Skan * should be added for those machines where division is fast.
392169689Skan * We optimize it by doing most of the calculations
393169689Skan * in one UWtype registers instead of two, although we don't
394169689Skan * have to.
395169689Skan */
396169689Skan#define _FP_SQRT_MEAT_E(R, S, T, X, q)			\
397169689Skan  do {							\
398169689Skan    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
399169689Skan    _FP_FRAC_SRL_2(X, (_FP_WORKBITS));			\
400169689Skan    while (q)						\
401169689Skan      {							\
402169689Skan        T##_f0 = S##_f0 + q;				\
403169689Skan        if (T##_f0 <= X##_f0)				\
404169689Skan          {						\
405169689Skan            S##_f0 = T##_f0 + q;			\
406169689Skan            X##_f0 -= T##_f0;				\
407169689Skan            R##_f0 += q;				\
408169689Skan          }						\
409169689Skan        _FP_FRAC_SLL_1(X, 1);				\
410169689Skan        q >>= 1;					\
411169689Skan      }							\
412169689Skan    _FP_FRAC_SLL_2(R, (_FP_WORKBITS));			\
413169689Skan    if (X##_f0)						\
414169689Skan      {							\
415169689Skan	if (S##_f0 < X##_f0)				\
416169689Skan	  R##_f0 |= _FP_WORK_ROUND;			\
417169689Skan	R##_f0 |= _FP_WORK_STICKY;			\
418169689Skan      }							\
419169689Skan  } while (0)
420169689Skan
421169689Skan#define FP_CMP_E(r,X,Y,un)	_FP_CMP(E,2,r,X,Y,un)
422169689Skan#define FP_CMP_EQ_E(r,X,Y)	_FP_CMP_EQ(E,2,r,X,Y)
423169689Skan#define FP_CMP_UNORD_E(r,X,Y)	_FP_CMP_UNORD(E,2,r,X,Y)
424169689Skan
425169689Skan#define FP_TO_INT_E(r,X,rsz,rsg)	_FP_TO_INT(E,2,r,X,rsz,rsg)
426169689Skan#define FP_FROM_INT_E(X,r,rs,rt)	_FP_FROM_INT(E,2,X,r,rs,rt)
427169689Skan
428169689Skan#define _FP_FRAC_HIGH_E(X)	(X##_f1)
429169689Skan#define _FP_FRAC_HIGH_RAW_E(X)	(X##_f0)
430169689Skan
431169689Skan#endif /* not _FP_W_TYPE_SIZE < 64 */
432