1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Routines to emulate some Altivec/VMX instructions, specifically
4 * those that can trap when given denormalized operands in Java mode.
5 */
6#include <linux/kernel.h>
7#include <linux/errno.h>
8#include <linux/sched.h>
9#include <asm/ptrace.h>
10#include <asm/processor.h>
11#include <asm/switch_to.h>
12#include <linux/uaccess.h>
13#include <asm/inst.h>
14
15/* Functions in vector.S */
16extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
17extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
18extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
19extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
20extern void vrefp(vector128 *dst, vector128 *src);
21extern void vrsqrtefp(vector128 *dst, vector128 *src);
22extern void vexptep(vector128 *dst, vector128 *src);
23
24static unsigned int exp2s[8] = {
25	0x800000,
26	0x8b95c2,
27	0x9837f0,
28	0xa5fed7,
29	0xb504f3,
30	0xc5672a,
31	0xd744fd,
32	0xeac0c7
33};
34
35/*
36 * Computes an estimate of 2^x.  The `s' argument is the 32-bit
37 * single-precision floating-point representation of x.
38 */
39static unsigned int eexp2(unsigned int s)
40{
41	int exp, pwr;
42	unsigned int mant, frac;
43
44	/* extract exponent field from input */
45	exp = ((s >> 23) & 0xff) - 127;
46	if (exp > 7) {
47		/* check for NaN input */
48		if (exp == 128 && (s & 0x7fffff) != 0)
49			return s | 0x400000;	/* return QNaN */
50		/* 2^-big = 0, 2^+big = +Inf */
51		return (s & 0x80000000)? 0: 0x7f800000;	/* 0 or +Inf */
52	}
53	if (exp < -23)
54		return 0x3f800000;	/* 1.0 */
55
56	/* convert to fixed point integer in 9.23 representation */
57	pwr = (s & 0x7fffff) | 0x800000;
58	if (exp > 0)
59		pwr <<= exp;
60	else
61		pwr >>= -exp;
62	if (s & 0x80000000)
63		pwr = -pwr;
64
65	/* extract integer part, which becomes exponent part of result */
66	exp = (pwr >> 23) + 126;
67	if (exp >= 254)
68		return 0x7f800000;
69	if (exp < -23)
70		return 0;
71
72	/* table lookup on top 3 bits of fraction to get mantissa */
73	mant = exp2s[(pwr >> 20) & 7];
74
75	/* linear interpolation using remaining 20 bits of fraction */
76	asm("mulhwu %0,%1,%2" : "=r" (frac)
77	    : "r" (pwr << 12), "r" (0x172b83ff));
78	asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
79	mant += frac;
80
81	if (exp >= 0)
82		return mant + (exp << 23);
83
84	/* denormalized result */
85	exp = -exp;
86	mant += 1 << (exp - 1);
87	return mant >> exp;
88}
89
90/*
91 * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
92 * single-precision floating-point representation of x.
93 */
94static unsigned int elog2(unsigned int s)
95{
96	int exp, mant, lz, frac;
97
98	exp = s & 0x7f800000;
99	mant = s & 0x7fffff;
100	if (exp == 0x7f800000) {	/* Inf or NaN */
101		if (mant != 0)
102			s |= 0x400000;	/* turn NaN into QNaN */
103		return s;
104	}
105	if ((exp | mant) == 0)		/* +0 or -0 */
106		return 0xff800000;	/* return -Inf */
107
108	if (exp == 0) {
109		/* denormalized */
110		asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
111		mant <<= lz - 8;
112		exp = (-118 - lz) << 23;
113	} else {
114		mant |= 0x800000;
115		exp -= 127 << 23;
116	}
117
118	if (mant >= 0xb504f3) {				/* 2^0.5 * 2^23 */
119		exp |= 0x400000;			/* 0.5 * 2^23 */
120		asm("mulhwu %0,%1,%2" : "=r" (mant)
121		    : "r" (mant), "r" (0xb504f334));	/* 2^-0.5 * 2^32 */
122	}
123	if (mant >= 0x9837f0) {				/* 2^0.25 * 2^23 */
124		exp |= 0x200000;			/* 0.25 * 2^23 */
125		asm("mulhwu %0,%1,%2" : "=r" (mant)
126		    : "r" (mant), "r" (0xd744fccb));	/* 2^-0.25 * 2^32 */
127	}
128	if (mant >= 0x8b95c2) {				/* 2^0.125 * 2^23 */
129		exp |= 0x100000;			/* 0.125 * 2^23 */
130		asm("mulhwu %0,%1,%2" : "=r" (mant)
131		    : "r" (mant), "r" (0xeac0c6e8));	/* 2^-0.125 * 2^32 */
132	}
133	if (mant > 0x800000) {				/* 1.0 * 2^23 */
134		/* calculate (mant - 1) * 1.381097463 */
135		/* 1.381097463 == 0.125 / (2^0.125 - 1) */
136		asm("mulhwu %0,%1,%2" : "=r" (frac)
137		    : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
138		exp += frac;
139	}
140	s = exp & 0x80000000;
141	if (exp != 0) {
142		if (s)
143			exp = -exp;
144		asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
145		lz = 8 - lz;
146		if (lz > 0)
147			exp >>= lz;
148		else if (lz < 0)
149			exp <<= -lz;
150		s += ((lz + 126) << 23) + exp;
151	}
152	return s;
153}
154
155#define VSCR_SAT	1
156
157static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
158{
159	int exp, mant;
160
161	exp = (x >> 23) & 0xff;
162	mant = x & 0x7fffff;
163	if (exp == 255 && mant != 0)
164		return 0;		/* NaN -> 0 */
165	exp = exp - 127 + scale;
166	if (exp < 0)
167		return 0;		/* round towards zero */
168	if (exp >= 31) {
169		/* saturate, unless the result would be -2^31 */
170		if (x + (scale << 23) != 0xcf000000)
171			*vscrp |= VSCR_SAT;
172		return (x & 0x80000000)? 0x80000000: 0x7fffffff;
173	}
174	mant |= 0x800000;
175	mant = (mant << 7) >> (30 - exp);
176	return (x & 0x80000000)? -mant: mant;
177}
178
179static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
180{
181	int exp;
182	unsigned int mant;
183
184	exp = (x >> 23) & 0xff;
185	mant = x & 0x7fffff;
186	if (exp == 255 && mant != 0)
187		return 0;		/* NaN -> 0 */
188	exp = exp - 127 + scale;
189	if (exp < 0)
190		return 0;		/* round towards zero */
191	if (x & 0x80000000) {
192		/* negative => saturate to 0 */
193		*vscrp |= VSCR_SAT;
194		return 0;
195	}
196	if (exp >= 32) {
197		/* saturate */
198		*vscrp |= VSCR_SAT;
199		return 0xffffffff;
200	}
201	mant |= 0x800000;
202	mant = (mant << 8) >> (31 - exp);
203	return mant;
204}
205
206/* Round to floating integer, towards 0 */
207static unsigned int rfiz(unsigned int x)
208{
209	int exp;
210
211	exp = ((x >> 23) & 0xff) - 127;
212	if (exp == 128 && (x & 0x7fffff) != 0)
213		return x | 0x400000;	/* NaN -> make it a QNaN */
214	if (exp >= 23)
215		return x;		/* it's an integer already (or Inf) */
216	if (exp < 0)
217		return x & 0x80000000;	/* |x| < 1.0 rounds to 0 */
218	return x & ~(0x7fffff >> exp);
219}
220
221/* Round to floating integer, towards +/- Inf */
222static unsigned int rfii(unsigned int x)
223{
224	int exp, mask;
225
226	exp = ((x >> 23) & 0xff) - 127;
227	if (exp == 128 && (x & 0x7fffff) != 0)
228		return x | 0x400000;	/* NaN -> make it a QNaN */
229	if (exp >= 23)
230		return x;		/* it's an integer already (or Inf) */
231	if ((x & 0x7fffffff) == 0)
232		return x;		/* +/-0 -> +/-0 */
233	if (exp < 0)
234		/* 0 < |x| < 1.0 rounds to +/- 1.0 */
235		return (x & 0x80000000) | 0x3f800000;
236	mask = 0x7fffff >> exp;
237	/* mantissa overflows into exponent - that's OK,
238	   it can't overflow into the sign bit */
239	return (x + mask) & ~mask;
240}
241
242/* Round to floating integer, to nearest */
243static unsigned int rfin(unsigned int x)
244{
245	int exp, half;
246
247	exp = ((x >> 23) & 0xff) - 127;
248	if (exp == 128 && (x & 0x7fffff) != 0)
249		return x | 0x400000;	/* NaN -> make it a QNaN */
250	if (exp >= 23)
251		return x;		/* it's an integer already (or Inf) */
252	if (exp < -1)
253		return x & 0x80000000;	/* |x| < 0.5 -> +/-0 */
254	if (exp == -1)
255		/* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
256		return (x & 0x80000000) | 0x3f800000;
257	half = 0x400000 >> exp;
258	/* add 0.5 to the magnitude and chop off the fraction bits */
259	return (x + half) & ~(0x7fffff >> exp);
260}
261
262int emulate_altivec(struct pt_regs *regs)
263{
264	ppc_inst_t instr;
265	unsigned int i, word;
266	unsigned int va, vb, vc, vd;
267	vector128 *vrs;
268
269	if (get_user_instr(instr, (void __user *)regs->nip))
270		return -EFAULT;
271
272	word = ppc_inst_val(instr);
273	if (ppc_inst_primary_opcode(instr) != 4)
274		return -EINVAL;		/* not an altivec instruction */
275	vd = (word >> 21) & 0x1f;
276	va = (word >> 16) & 0x1f;
277	vb = (word >> 11) & 0x1f;
278	vc = (word >> 6) & 0x1f;
279
280	vrs = current->thread.vr_state.vr;
281	switch (word & 0x3f) {
282	case 10:
283		switch (vc) {
284		case 0:	/* vaddfp */
285			vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
286			break;
287		case 1:	/* vsubfp */
288			vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
289			break;
290		case 4:	/* vrefp */
291			vrefp(&vrs[vd], &vrs[vb]);
292			break;
293		case 5:	/* vrsqrtefp */
294			vrsqrtefp(&vrs[vd], &vrs[vb]);
295			break;
296		case 6:	/* vexptefp */
297			for (i = 0; i < 4; ++i)
298				vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
299			break;
300		case 7:	/* vlogefp */
301			for (i = 0; i < 4; ++i)
302				vrs[vd].u[i] = elog2(vrs[vb].u[i]);
303			break;
304		case 8:		/* vrfin */
305			for (i = 0; i < 4; ++i)
306				vrs[vd].u[i] = rfin(vrs[vb].u[i]);
307			break;
308		case 9:		/* vrfiz */
309			for (i = 0; i < 4; ++i)
310				vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
311			break;
312		case 10:	/* vrfip */
313			for (i = 0; i < 4; ++i) {
314				u32 x = vrs[vb].u[i];
315				x = (x & 0x80000000)? rfiz(x): rfii(x);
316				vrs[vd].u[i] = x;
317			}
318			break;
319		case 11:	/* vrfim */
320			for (i = 0; i < 4; ++i) {
321				u32 x = vrs[vb].u[i];
322				x = (x & 0x80000000)? rfii(x): rfiz(x);
323				vrs[vd].u[i] = x;
324			}
325			break;
326		case 14:	/* vctuxs */
327			for (i = 0; i < 4; ++i)
328				vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
329					&current->thread.vr_state.vscr.u[3]);
330			break;
331		case 15:	/* vctsxs */
332			for (i = 0; i < 4; ++i)
333				vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
334					&current->thread.vr_state.vscr.u[3]);
335			break;
336		default:
337			return -EINVAL;
338		}
339		break;
340	case 46:	/* vmaddfp */
341		vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
342		break;
343	case 47:	/* vnmsubfp */
344		vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
345		break;
346	default:
347		return -EINVAL;
348	}
349
350	return 0;
351}
352