1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * IEEE754 floating point arithmetic
4 * single precision: MADDF.f (Fused Multiply Add)
5 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
6 *
7 * MIPS floating point support
8 * Copyright (C) 2015 Imagination Technologies, Ltd.
9 * Author: Markos Chandras <markos.chandras@imgtec.com>
10 */
11
12#include "ieee754sp.h"
13
14
15static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
16				 union ieee754sp y, enum maddf_flags flags)
17{
18	int re;
19	int rs;
20	unsigned int rm;
21	u64 rm64;
22	u64 zm64;
23	int s;
24
25	COMPXSP;
26	COMPYSP;
27	COMPZSP;
28
29	EXPLODEXSP;
30	EXPLODEYSP;
31	EXPLODEZSP;
32
33	FLUSHXSP;
34	FLUSHYSP;
35	FLUSHZSP;
36
37	ieee754_clearcx();
38
39	rs = xs ^ ys;
40	if (flags & MADDF_NEGATE_PRODUCT)
41		rs ^= 1;
42	if (flags & MADDF_NEGATE_ADDITION)
43		zs ^= 1;
44
45	/*
46	 * Handle the cases when at least one of x, y or z is a NaN.
47	 * Order of precedence is sNaN, qNaN and z, x, y.
48	 */
49	if (zc == IEEE754_CLASS_SNAN)
50		return ieee754sp_nanxcpt(z);
51	if (xc == IEEE754_CLASS_SNAN)
52		return ieee754sp_nanxcpt(x);
53	if (yc == IEEE754_CLASS_SNAN)
54		return ieee754sp_nanxcpt(y);
55	if (zc == IEEE754_CLASS_QNAN)
56		return z;
57	if (xc == IEEE754_CLASS_QNAN)
58		return x;
59	if (yc == IEEE754_CLASS_QNAN)
60		return y;
61
62	if (zc == IEEE754_CLASS_DNORM)
63		SPDNORMZ;
64	/* ZERO z cases are handled separately below */
65
66	switch (CLPAIR(xc, yc)) {
67
68
69	/*
70	 * Infinity handling
71	 */
72	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
73	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
74		ieee754_setcx(IEEE754_INVALID_OPERATION);
75		return ieee754sp_indef();
76
77	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
78	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
79	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
80	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
81	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
82		if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
83			/*
84			 * Cases of addition of infinities with opposite signs
85			 * or subtraction of infinities with same signs.
86			 */
87			ieee754_setcx(IEEE754_INVALID_OPERATION);
88			return ieee754sp_indef();
89		}
90		/*
91		 * z is here either not an infinity, or an infinity having the
92		 * same sign as product (x*y). The result must be an infinity,
93		 * and its sign is determined only by the sign of product (x*y).
94		 */
95		return ieee754sp_inf(rs);
96
97	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
98	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
99	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
100	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
101	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
102		if (zc == IEEE754_CLASS_INF)
103			return ieee754sp_inf(zs);
104		if (zc == IEEE754_CLASS_ZERO) {
105			/* Handle cases +0 + (-0) and similar ones. */
106			if (zs == rs)
107				/*
108				 * Cases of addition of zeros of equal signs
109				 * or subtraction of zeroes of opposite signs.
110				 * The sign of the resulting zero is in any
111				 * such case determined only by the sign of z.
112				 */
113				return z;
114
115			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
116		}
117		/* x*y is here 0, and z is not 0, so just return z */
118		return z;
119
120	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
121		SPDNORMX;
122		fallthrough;
123	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
124		if (zc == IEEE754_CLASS_INF)
125			return ieee754sp_inf(zs);
126		SPDNORMY;
127		break;
128
129	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
130		if (zc == IEEE754_CLASS_INF)
131			return ieee754sp_inf(zs);
132		SPDNORMX;
133		break;
134
135	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
136		if (zc == IEEE754_CLASS_INF)
137			return ieee754sp_inf(zs);
138		/* continue to real computations */
139	}
140
141	/* Finally get to do some computation */
142
143	/*
144	 * Do the multiplication bit first
145	 *
146	 * rm = xm * ym, re = xe + ye basically
147	 *
148	 * At this point xm and ym should have been normalized.
149	 */
150
151	/* rm = xm * ym, re = xe+ye basically */
152	assert(xm & SP_HIDDEN_BIT);
153	assert(ym & SP_HIDDEN_BIT);
154
155	re = xe + ye;
156
157	/* Multiple 24 bit xm and ym to give 48 bit results */
158	rm64 = (uint64_t)xm * ym;
159
160	/* Shunt to top of word */
161	rm64 = rm64 << 16;
162
163	/* Put explicit bit at bit 62 if necessary */
164	if ((int64_t) rm64 < 0) {
165		rm64 = rm64 >> 1;
166		re++;
167	}
168
169	assert(rm64 & (1 << 62));
170
171	if (zc == IEEE754_CLASS_ZERO) {
172		/*
173		 * Move explicit bit from bit 62 to bit 26 since the
174		 * ieee754sp_format code expects the mantissa to be
175		 * 27 bits wide (24 + 3 rounding bits).
176		 */
177		rm = XSPSRS64(rm64, (62 - 26));
178		return ieee754sp_format(rs, re, rm);
179	}
180
181	/* Move explicit bit from bit 23 to bit 62 */
182	zm64 = (uint64_t)zm << (62 - 23);
183	assert(zm64 & (1 << 62));
184
185	/* Make the exponents the same */
186	if (ze > re) {
187		/*
188		 * Have to shift r fraction right to align.
189		 */
190		s = ze - re;
191		rm64 = XSPSRS64(rm64, s);
192		re += s;
193	} else if (re > ze) {
194		/*
195		 * Have to shift z fraction right to align.
196		 */
197		s = re - ze;
198		zm64 = XSPSRS64(zm64, s);
199		ze += s;
200	}
201	assert(ze == re);
202	assert(ze <= SP_EMAX);
203
204	/* Do the addition */
205	if (zs == rs) {
206		/*
207		 * Generate 64 bit result by adding two 63 bit numbers
208		 * leaving result in zm64, zs and ze.
209		 */
210		zm64 = zm64 + rm64;
211		if ((int64_t)zm64 < 0) {	/* carry out */
212			zm64 = XSPSRS1(zm64);
213			ze++;
214		}
215	} else {
216		if (zm64 >= rm64) {
217			zm64 = zm64 - rm64;
218		} else {
219			zm64 = rm64 - zm64;
220			zs = rs;
221		}
222		if (zm64 == 0)
223			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
224
225		/*
226		 * Put explicit bit at bit 62 if necessary.
227		 */
228		while ((zm64 >> 62) == 0) {
229			zm64 <<= 1;
230			ze--;
231		}
232	}
233
234	/*
235	 * Move explicit bit from bit 62 to bit 26 since the
236	 * ieee754sp_format code expects the mantissa to be
237	 * 27 bits wide (24 + 3 rounding bits).
238	 */
239	zm = XSPSRS64(zm64, (62 - 26));
240
241	return ieee754sp_format(zs, ze, zm);
242}
243
244union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
245				union ieee754sp y)
246{
247	return _sp_maddf(z, x, y, 0);
248}
249
250union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
251				union ieee754sp y)
252{
253	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
254}
255
256union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
257				union ieee754sp y)
258{
259	return _sp_maddf(z, x, y, 0);
260}
261
262union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
263				union ieee754sp y)
264{
265	return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
266}
267
268union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
269				union ieee754sp y)
270{
271	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
272}
273
274union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
275				union ieee754sp y)
276{
277	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
278}
279