1/*	$OpenBSD: m88110_fp.c,v 1.14 2023/01/31 15:18:54 deraadt Exp $	*/
2
3/*
4 * Copyright (c) 2007, Miodrag Vallat.
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice, this permission notice, and the disclaimer below
9 * appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20#include <sys/param.h>
21#include <sys/proc.h>
22#include <sys/signalvar.h>
23#include <sys/systm.h>
24
25#include <machine/fpu.h>
26#include <machine/frame.h>
27#include <machine/ieeefp.h>
28#include <machine/trap.h>
29#include <machine/m88110.h>
30
31#include <lib/libkern/softfloat.h>
32
33#include <m88k/m88k/fpu.h>
34
35int	m88110_fpu_emulate(struct trapframe *, u_int32_t);
36void	m88110_fpu_fetch(struct trapframe *, u_int, u_int, u_int, fparg *);
37
38/*
39 * All 88110 floating-point exceptions are handled there.
40 *
41 * We can unfortunately not trust the floating-point exception cause
42 * register, as the 88110 will conveniently only set the ``unimplemented
43 * instruction'' bit, more often than not.
44 *
45 * So we ignore it completely, and try to emulate the faulting instruction.
46 * The instruction can be:
47 *
48 * - an invalid SFU1 opcode, in which case we'll send SIGILL to the process.
49 *
50 * - a genuinely unimplemented feature: fsqrt.
51 *
52 * - an opcode involving an odd-numbered register pair (as a double precision
53 *   operand). Rather than issuing a correctly formed flavour in kernel mode,
54 *   and having to handle a possible nested exception, we emulate it. This
55 *   will of course be slower, but we have to draw the line somewhere.
56 *   Gcc will however never produce such code, so we don't have to worry
57 *   too much about this under OpenBSD.
58 *
59 * Note that, currently, opcodes involving the extended register file (XRF)
60 * are handled as invalid opcodes. This will eventually change once the
61 * toolchain can correctly assemble XRF instructions, and the XRF is saved
62 * across context switches (or not... lazy switching for XRF makes more
63 * sense).
64 */
65
66void
67m88110_fpu_exception(struct trapframe *frame)
68{
69	struct proc *p = curproc;
70	int fault_type;
71	vaddr_t fault_addr;
72	union sigval sv;
73	u_int32_t insn;
74	int sig;
75
76	fault_addr = frame->tf_exip & XIP_ADDR;
77
78	/*
79	 * Skip the instruction now. Signals will blame the correct
80	 * address, and this has to be done before trapsignal() is
81	 * invoked, or we won't run the first instruction of the signal
82	 * handler...
83	 */
84	m88110_skip_insn(frame);
85
86	/*
87	 * The low-level exception code did not save the floating point
88	 * exception registers. Do it now, and reset the exception
89	 * cause register.
90	 */
91	__asm__ volatile ("fldcr %0, %%fcr0" : "=r"(frame->tf_fpecr));
92	__asm__ volatile ("fldcr %0, %%fcr62" : "=r"(frame->tf_fpsr));
93	__asm__ volatile ("fldcr %0, %%fcr63" : "=r"(frame->tf_fpcr));
94	__asm__ volatile ("fstcr %r0, %fcr0");
95
96	/*
97	 * Fetch the faulting instruction. This should not fail, if it
98	 * does, it's probably not your lucky day.
99	 */
100	if (copyinsn(p, (u_int32_t *)fault_addr, (u_int32_t *)&insn) != 0) {
101		sig = SIGBUS;
102		fault_type = BUS_OBJERR;
103		goto deliver;
104	}
105
106	switch (insn >> 26) {
107	case 0x20:
108		/*
109		 * f{ld,st,x}cr instruction. If it caused a fault in
110		 * user mode, this is a privilege violation.
111		 */
112		sig = SIGILL;
113		fault_type = ILL_PRVREG;
114		goto deliver;
115	case 0x21:
116		/*
117		 * ``real'' FPU instruction. We'll try to emulate it,
118		 * unless FPU is disabled.
119		 */
120		if (frame->tf_epsr & PSR_SFD1) {	/* don't bother */
121			sig = SIGFPE;
122			fault_type = FPE_FLTINV;
123			goto deliver;
124		}
125		sig = m88110_fpu_emulate(frame, insn);
126		fault_type = SI_NOINFO;
127		/*
128		 * Update the floating point status register regardless of
129		 * whether we'll deliver a signal or not.
130		 */
131		__asm__ volatile ("fstcr %0, %%fcr62" :: "r"(frame->tf_fpsr));
132		break;
133	default:
134		/*
135		 * Not a FPU instruction. Should not have raised this
136		 * exception, so bail out.
137		 */
138		sig = SIGILL;
139		fault_type = ILL_ILLOPC;
140		goto deliver;
141	}
142
143	if (sig != 0) {
144		if (sig == SIGILL)
145			fault_type = ILL_ILLOPC;
146		else {
147			if (frame->tf_fpecr & FPECR_FIOV)
148				fault_type = FPE_FLTSUB;
149			else if (frame->tf_fpecr & FPECR_FROP)
150				fault_type = FPE_FLTINV;
151			else if (frame->tf_fpecr & FPECR_FDVZ)
152				fault_type = FPE_INTDIV;
153			else if (frame->tf_fpecr & FPECR_FUNF) {
154				if (frame->tf_fpsr & FPSR_EFUNF)
155					fault_type = FPE_FLTUND;
156				else if (frame->tf_fpsr & FPSR_EFINX)
157					fault_type = FPE_FLTRES;
158			} else if (frame->tf_fpecr & FPECR_FOVF) {
159				if (frame->tf_fpsr & FPSR_EFOVF)
160					fault_type = FPE_FLTOVF;
161				else if (frame->tf_fpsr & FPSR_EFINX)
162					fault_type = FPE_FLTRES;
163			} else if (frame->tf_fpecr & FPECR_FINX)
164				fault_type = FPE_FLTRES;
165		}
166
167deliver:
168		sv.sival_ptr = (void *)fault_addr;
169		trapsignal(p, sig, 0, fault_type, sv);
170	}
171}
172
173/*
174 * Load a floating-point argument into a fparg union, then convert it to
175 * the required format if it is of larger precision.
176 *
177 * This assumes the final format (width) is not FTYPE_INT, and the original
178 * format (orig_width) <= width.
179 */
180void
181m88110_fpu_fetch(struct trapframe *frame, u_int regno, u_int orig_width,
182    u_int width, fparg *dest)
183{
184	u_int32_t tmp;
185
186	switch (orig_width) {
187	case FTYPE_INT:
188		tmp = regno == 0 ? 0 : frame->tf_r[regno];
189		switch (width) {
190		case FTYPE_SNG:
191			dest->sng = int32_to_float32(tmp);
192			break;
193		case FTYPE_DBL:
194			dest->dbl = int32_to_float64(tmp);
195			break;
196		}
197		break;
198	case FTYPE_SNG:
199		tmp = regno == 0 ? 0 : frame->tf_r[regno];
200		switch (width) {
201		case FTYPE_SNG:
202			dest->sng = tmp;
203			break;
204		case FTYPE_DBL:
205			dest->dbl = float32_to_float64(tmp);
206			break;
207		}
208		break;
209	case FTYPE_DBL:
210		tmp = regno == 0 ? 0 : frame->tf_r[regno];
211		dest->dbl = ((float64)tmp) << 32;
212		tmp = regno == 31 ? 0 : frame->tf_r[regno + 1];
213		dest->dbl |= (float64)tmp;
214		break;
215	}
216}
217
218/*
219 * Emulate an FPU instruction.  On return, the trapframe registers
220 * will be modified to reflect the settings the hardware would have left.
221 */
222int
223m88110_fpu_emulate(struct trapframe *frame, u_int32_t insn)
224{
225	u_int rf, rd, rs1, rs2, t1, t2, td, tmax, opcode;
226	u_int32_t old_fpsr, old_fpcr;
227	int rc;
228
229	fparg arg1, arg2, dest;
230
231	/*
232	 * Crack the instruction.
233	 */
234	rd = (insn >> 21) & 0x1f;
235	rs1 = (insn >> 16) & 0x1f;
236	rs2 = insn & 0x1f;
237	rf = (insn >> 15) & 0x01;
238	opcode = (insn >> 11) & 0x0f;
239	t1 = (insn >> 9) & 0x03;
240	t2 = (insn >> 7) & 0x03;
241	td = (insn >> 5) & 0x03;
242
243	/*
244	 * Discard invalid opcodes, as well as instructions involving XRF,
245	 * since we do not support them yet.
246	 */
247	if (rf != 0)
248		return (SIGILL);
249
250	switch (opcode) {
251	case 0x00:	/* fmul */
252	case 0x05:	/* fadd */
253	case 0x06:	/* fsub */
254	case 0x0e:	/* fdiv */
255		if ((t1 != FTYPE_SNG && t1 != FTYPE_DBL) ||
256		    (t2 != FTYPE_SNG && t2 != FTYPE_DBL) ||
257		    (td != FTYPE_SNG && td != FTYPE_DBL))
258			return (SIGILL);
259		break;
260	case 0x04:	/* flt */
261		if (t1 != 0x00)	/* flt on XRF */
262			return (SIGILL);
263		if ((td != FTYPE_SNG && td != FTYPE_DBL) ||
264		    t2 != 0x00 || rs1 != 0)
265			return (SIGILL);
266		break;
267	case 0x07:	/* fcmp, fcmpu */
268		if ((t1 != FTYPE_SNG && t1 != FTYPE_DBL) ||
269		    (t2 != FTYPE_SNG && t2 != FTYPE_DBL))
270			return (SIGILL);
271		if (td != 0x00 /* fcmp */ && td != 0x01 /* fcmpu */)
272			return (SIGILL);
273		break;
274	case 0x09:	/* int */
275	case 0x0a:	/* nint */
276	case 0x0b:	/* trnc */
277		if ((t2 != FTYPE_SNG && t2 != FTYPE_DBL) ||
278		    t1 != 0x00 || td != 0x00 || rs1 != 0)
279			return (SIGILL);
280		break;
281	case 0x01:	/* fcvt */
282		if (t2 == td)
283			return (SIGILL);
284		/* FALLTHROUGH */
285	case 0x0f:	/* fsqrt */
286		if ((t2 != FTYPE_SNG && t2 != FTYPE_DBL) ||
287		    (td != FTYPE_SNG && td != FTYPE_DBL) ||
288		    t1 != 0x00 || rs1 != 0)
289			return (SIGILL);
290		break;
291	default:
292	case 0x08:	/* mov */
293		return (SIGILL);
294	}
295
296	/*
297	 * Temporarily reset the status register, so that we can tell
298	 * which exceptions are new after processing the opcode.
299	 */
300	old_fpsr = frame->tf_fpsr;
301	frame->tf_fpsr = 0;
302
303	/*
304	 * Save fpcr as well, since we might need to change rounding mode
305	 * temporarily.
306	 */
307	old_fpcr = frame->tf_fpcr;
308
309	/*
310	 * The logic for instruction emulation is:
311	 *
312	 * - the computation precision is the largest one of all the operands.
313	 * - all source operands are converted to this precision if needed.
314	 * - computation is performed.
315	 * - the result is stored into the destination operand, converting it
316	 *   to the destination precision if lower.
317	 */
318
319	switch (opcode) {
320	case 0x00:	/* fmul */
321		tmax = fpu_precision(t1, t2, td);
322		m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1);
323		m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2);
324		switch (tmax) {
325		case FTYPE_SNG:
326			dest.sng = float32_mul(arg1.sng, arg2.sng);
327			break;
328		case FTYPE_DBL:
329			dest.dbl = float64_mul(arg1.dbl, arg2.dbl);
330			break;
331		}
332		fpu_store(frame, rd, tmax, td, &dest);
333		break;
334
335	case 0x01:	/* fcvt */
336		tmax = fpu_precision(IGNORE_PRECISION, t2, td);
337		m88110_fpu_fetch(frame, rs2, t2, tmax, &dest);
338		fpu_store(frame, rd, tmax, td, &dest);
339		break;
340
341	case 0x04:	/* flt */
342		m88110_fpu_fetch(frame, rs2, FTYPE_INT, td, &dest);
343		fpu_store(frame, rd, td, td, &dest);
344		break;
345
346	case 0x05:	/* fadd */
347		tmax = fpu_precision(t1, t2, td);
348		m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1);
349		m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2);
350		switch (tmax) {
351		case FTYPE_SNG:
352			dest.sng = float32_add(arg1.sng, arg2.sng);
353			break;
354		case FTYPE_DBL:
355			dest.dbl = float64_add(arg1.dbl, arg2.dbl);
356			break;
357		}
358		fpu_store(frame, rd, tmax, td, &dest);
359		break;
360
361	case 0x06:	/* fsub */
362		tmax = fpu_precision(t1, t2, td);
363		m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1);
364		m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2);
365		switch (tmax) {
366		case FTYPE_SNG:
367			dest.sng = float32_sub(arg1.sng, arg2.sng);
368			break;
369		case FTYPE_DBL:
370			dest.dbl = float64_sub(arg1.dbl, arg2.dbl);
371			break;
372		}
373		fpu_store(frame, rd, tmax, td, &dest);
374		break;
375
376	case 0x07:	/* fcmp, fcmpu */
377		tmax = fpu_precision(t1, t2, IGNORE_PRECISION);
378		m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1);
379		m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2);
380		fpu_compare(frame, &arg1, &arg2, tmax, rd, td /* fcmpu */);
381		break;
382
383	case 0x09:	/* int */
384do_int:
385		m88110_fpu_fetch(frame, rs2, t2, t2, &dest);
386		fpu_store(frame, rd, t2, FTYPE_INT, &dest);
387		break;
388
389	case 0x0a:	/* nint */
390		/* round to nearest */
391		frame->tf_fpcr = (old_fpcr & ~(FPCR_RD_MASK << FPCR_RD_SHIFT)) |
392		    (FP_RN << FPCR_RD_SHIFT);
393		goto do_int;
394
395	case 0x0b:	/* trnc */
396		/* round towards zero */
397		frame->tf_fpcr = (old_fpcr & ~(FPCR_RD_MASK << FPCR_RD_SHIFT)) |
398		    (FP_RZ << FPCR_RD_SHIFT);
399		goto do_int;
400
401	case 0x0e:	/* fdiv */
402		tmax = fpu_precision(t1, t2, td);
403		m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1);
404		m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2);
405		switch (tmax) {
406		case FTYPE_SNG:
407			dest.sng = float32_div(arg1.sng, arg2.sng);
408			break;
409		case FTYPE_DBL:
410			dest.dbl = float64_div(arg1.dbl, arg2.dbl);
411			break;
412		}
413		fpu_store(frame, rd, tmax, td, &dest);
414		break;
415
416	case 0x0f:	/* sqrt */
417		tmax = fpu_precision(IGNORE_PRECISION, t2, td);
418		m88110_fpu_fetch(frame, rs2, t2, tmax, &arg1);
419		switch (tmax) {
420		case FTYPE_SNG:
421			dest.sng = float32_sqrt(arg1.sng);
422			break;
423		case FTYPE_DBL:
424			dest.dbl = float64_sqrt(arg1.dbl);
425			break;
426		}
427		fpu_store(frame, rd, tmax, td, &dest);
428		break;
429	}
430
431	/*
432	 * Mark new exceptions, if any, in the fpsr, and decide whether
433	 * to send a signal or not.
434	 */
435
436	if (frame->tf_fpsr & old_fpcr)
437		rc = SIGFPE;
438	else
439		rc = 0;
440	frame->tf_fpsr |= old_fpsr;
441
442	/*
443	 * Restore fpcr as well.
444	 */
445	frame->tf_fpcr = old_fpcr;
446
447	return (rc);
448}
449