m88k/m88k/m88110_fp.c

/*	$OpenBSD: m88110_fp.c,v 1.14 2023/01/31 15:18:54 deraadt Exp $	*/

/*
 * Copyright (c) 2007, Miodrag Vallat.
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice, this permission notice, and the disclaimer below
 * appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include <sys/param.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/systm.h>

#include <machine/fpu.h>
#include <machine/frame.h>
#include <machine/ieeefp.h>
#include <machine/trap.h>
#include <machine/m88110.h>

#include <lib/libkern/softfloat.h>

#include <m88k/m88k/fpu.h>

int	m88110_fpu_emulate(struct trapframe *, u_int32_t);
void	m88110_fpu_fetch(struct trapframe *, u_int, u_int, u_int, fparg *);

/*
 * All 88110 floating-point exceptions are handled there.
 *
 * We can unfortunately not trust the floating-point exception cause
 * register, as the 88110 will conveniently only set the ``unimplemented
 * instruction'' bit, more often than not.
 *
 * So we ignore it completely, and try to emulate the faulting instruction.
 * The instruction can be:
 *
 * - an invalid SFU1 opcode, in which case we'll send SIGILL to the process.
 *
 * - a genuinely unimplemented feature: fsqrt.
 *
 * - an opcode involving an odd-numbered register pair (as a double precision
 *   operand). Rather than issuing a correctly formed flavour in kernel mode,
 *   and having to handle a possible nested exception, we emulate it. This
 *   will of course be slower, but we have to draw the line somewhere.
 *   Gcc will however never produce such code, so we don't have to worry
 *   too much about this under OpenBSD.
 *
 * Note that, currently, opcodes involving the extended register file (XRF)
 * are handled as invalid opcodes. This will eventually change once the
 * toolchain can correctly assemble XRF instructions, and the XRF is saved
 * across context switches (or not... lazy switching for XRF makes more
 * sense).
 */

void
m88110_fpu_exception(struct trapframe *frame)
{
	struct proc *p = curproc;
	int fault_type;
	vaddr_t fault_addr;
	union sigval sv;
	u_int32_t insn;
	int sig;

	fault_addr = frame->tf_exip & XIP_ADDR;

	/*
	 * Skip the instruction now. Signals will blame the correct
	 * address, and this has to be done before trapsignal() is
	 * invoked, or we won't run the first instruction of the signal
	 * handler...
	 */
	m88110_skip_insn(frame);

	/*
	 * The low-level exception code did not save the floating point
	 * exception registers. Do it now, and reset the exception
	 * cause register.
	 */
	__asm__ volatile ("fldcr %0, %%fcr0" : "=r"(frame->tf_fpecr));
	__asm__ volatile ("fldcr %0, %%fcr62" : "=r"(frame->tf_fpsr));
	__asm__ volatile ("fldcr %0, %%fcr63" : "=r"(frame->tf_fpcr));
	__asm__ volatile ("fstcr %r0, %fcr0");

	/*
	 * Fetch the faulting instruction. This should not fail, if it
	 * does, it's probably not your lucky day.
	 */
	if (copyinsn(p, (u_int32_t *)fault_addr, (u_int32_t *)&insn) != 0) {
		sig = SIGBUS;
		fault_type = BUS_OBJERR;
		goto deliver;
	}

	switch (insn >> 26) {
	case 0x20:
		/*
		 * f{ld,st,x}cr instruction. If it caused a fault in
		 * user mode, this is a privilege violation.
		 */
		sig = SIGILL;
		fault_type = ILL_PRVREG;
		goto deliver;
	case 0x21:
		/*
		 * ``real'' FPU instruction. We'll try to emulate it,
		 * unless FPU is disabled.
		 */
		if (frame->tf_epsr & PSR_SFD1) {	/* don't bother */
			sig = SIGFPE;
			fault_type = FPE_FLTINV;
			goto deliver;
		}
		sig = m88110_fpu_emulate(frame, insn);
		fault_type = SI_NOINFO;
		/*
		 * Update the floating point status register regardless of
		 * whether we'll deliver a signal or not.
		 */
		__asm__ volatile ("fstcr %0, %%fcr62" :: "r"(frame->tf_fpsr));
		break;
	default:
		/*
		 * Not a FPU instruction. Should not have raised this
		 * exception, so bail out.
		 */
		sig = SIGILL;
		fault_type = ILL_ILLOPC;
		goto deliver;
	}

	if (sig != 0) {
		if (sig == SIGILL)
			fault_type = ILL_ILLOPC;
		else {
			if (frame->tf_fpecr & FPECR_FIOV)
				fault_type = FPE_FLTSUB;
			else if (frame->tf_fpecr & FPECR_FROP)
				fault_type = FPE_FLTINV;
			else if (frame->tf_fpecr & FPECR_FDVZ)
				fault_type = FPE_INTDIV;
			else if (frame->tf_fpecr & FPECR_FUNF) {
				if (frame->tf_fpsr & FPSR_EFUNF)
					fault_type = FPE_FLTUND;
				else if (frame->tf_fpsr & FPSR_EFINX)
					fault_type = FPE_FLTRES;
			} else if (frame->tf_fpecr & FPECR_FOVF) {
				if (frame->tf_fpsr & FPSR_EFOVF)
					fault_type = FPE_FLTOVF;
				else if (frame->tf_fpsr & FPSR_EFINX)
					fault_type = FPE_FLTRES;
			} else if (frame->tf_fpecr & FPECR_FINX)
				fault_type = FPE_FLTRES;
		}

deliver:
		sv.sival_ptr = (void *)fault_addr;
		trapsignal(p, sig, 0, fault_type, sv);
	}
}

/*
 * Load a floating-point argument into a fparg union, then convert it to
 * the required format if it is of larger precision.
 *
 * This assumes the final format (width) is not FTYPE_INT, and the original
 * format (orig_width) <= width.
 */
void
m88110_fpu_fetch(struct trapframe *frame, u_int regno, u_int orig_width,
    u_int width, fparg *dest)
{
	u_int32_t tmp;

	switch (orig_width) {
	case FTYPE_INT:
		tmp = regno == 0 ? 0 : frame->tf_r[regno];
		switch (width) {
		case FTYPE_SNG:
			dest->sng = int32_to_float32(tmp);
			break;
		case FTYPE_DBL:
			dest->dbl = int32_to_float64(tmp);
			break;
		}
		break;
	case FTYPE_SNG:
		tmp = regno == 0 ? 0 : frame->tf_r[regno];
		switch (width) {
		case FTYPE_SNG:
			dest->sng = tmp;
			break;
		case FTYPE_DBL:
			dest->dbl = float32_to_float64(tmp);
			break;
		}
		break;
	case FTYPE_DBL:
		tmp = regno == 0 ? 0 : frame->tf_r[regno];
		dest->dbl = ((float64)tmp) << 32;
		tmp = regno == 31 ? 0 : frame->tf_r[regno + 1];
		dest->dbl |= (float64)tmp;
		break;
	}
}

/*
 * Emulate an FPU instruction.  On return, the trapframe registers
 * will be modified to reflect the settings the hardware would have left.
 */
int
m88110_fpu_emulate(struct trapframe *frame, u_int32_t insn)
{
	u_int rf, rd, rs1, rs2, t1, t2, td, tmax, opcode;
	u_int32_t old_fpsr, old_fpcr;
	int rc;

	fparg arg1, arg2, dest;

	/*
	 * Crack the instruction.
	 */
	rd = (insn >> 21) & 0x1f;
	rs1 = (insn >> 16) & 0x1f;
	rs2 = insn & 0x1f;
	rf = (insn >> 15) & 0x01;
	opcode = (insn >> 11) & 0x0f;
	t1 = (insn >> 9) & 0x03;
	t2 = (insn >> 7) & 0x03;
	td = (insn >> 5) & 0x03;

	/*
	 * Discard invalid opcodes, as well as instructions involving XRF,
	 * since we do not support them yet.
	 */
	if (rf != 0)
		return (SIGILL);

	switch (opcode) {
	case 0x00:	/* fmul */
	case 0x05:	/* fadd */
	case 0x06:	/* fsub */
	case 0x0e:	/* fdiv */
		if ((t1 != FTYPE_SNG && t1 != FTYPE_DBL) ||
		    (t2 != FTYPE_SNG && t2 != FTYPE_DBL) ||
		    (td != FTYPE_SNG && td != FTYPE_DBL))
			return (SIGILL);
		break;
	case 0x04:	/* flt */
		if (t1 != 0x00)	/* flt on XRF */
			return (SIGILL);
		if ((td != FTYPE_SNG && td != FTYPE_DBL) ||
		    t2 != 0x00 || rs1 != 0)
			return (SIGILL);
		break;
	case 0x07:	/* fcmp, fcmpu */
		if ((t1 != FTYPE_SNG && t1 != FTYPE_DBL) ||
		    (t2 != FTYPE_SNG && t2 != FTYPE_DBL))
			return (SIGILL);
		if (td != 0x00 /* fcmp */ && td != 0x01 /* fcmpu */)
			return (SIGILL);
		break;
	case 0x09:	/* int */
	case 0x0a:	/* nint */
	case 0x0b:	/* trnc */
		if ((t2 != FTYPE_SNG && t2 != FTYPE_DBL) ||
		    t1 != 0x00 || td != 0x00 || rs1 != 0)
			return (SIGILL);
		break;
	case 0x01:	/* fcvt */
		if (t2 == td)
			return (SIGILL);
		/* FALLTHROUGH */
	case 0x0f:	/* fsqrt */
		if ((t2 != FTYPE_SNG && t2 != FTYPE_DBL) ||
		    (td != FTYPE_SNG && td != FTYPE_DBL) ||
		    t1 != 0x00 || rs1 != 0)
			return (SIGILL);
		break;
	default:
	case 0x08:	/* mov */
		return (SIGILL);
	}

	/*
	 * Temporarily reset the status register, so that we can tell
	 * which exceptions are new after processing the opcode.
	 */
	old_fpsr = frame->tf_fpsr;
	frame->tf_fpsr = 0;

	/*
	 * Save fpcr as well, since we might need to change rounding mode
	 * temporarily.
	 */
	old_fpcr = frame->tf_fpcr;

	/*
	 * The logic for instruction emulation is:
	 *
	 * - the computation precision is the largest one of all the operands.
	 * - all source operands are converted to this precision if needed.
	 * - computation is performed.
	 * - the result is stored into the destination operand, converting it
	 *   to the destination precision if lower.
	 */

	switch (opcode) {
	case 0x00:	/* fmul */
		tmax = fpu_precision(t1, t2, td);
		m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1);
		m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2);
		switch (tmax) {
		case FTYPE_SNG:
			dest.sng = float32_mul(arg1.sng, arg2.sng);
			break;
		case FTYPE_DBL:
			dest.dbl = float64_mul(arg1.dbl, arg2.dbl);
			break;
		}
		fpu_store(frame, rd, tmax, td, &dest);
		break;

	case 0x01:	/* fcvt */
		tmax = fpu_precision(IGNORE_PRECISION, t2, td);
		m88110_fpu_fetch(frame, rs2, t2, tmax, &dest);
		fpu_store(frame, rd, tmax, td, &dest);
		break;

	case 0x04:	/* flt */
		m88110_fpu_fetch(frame, rs2, FTYPE_INT, td, &dest);
		fpu_store(frame, rd, td, td, &dest);
		break;

	case 0x05:	/* fadd */
		tmax = fpu_precision(t1, t2, td);
		m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1);
		m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2);
		switch (tmax) {
		case FTYPE_SNG:
			dest.sng = float32_add(arg1.sng, arg2.sng);
			break;
		case FTYPE_DBL:
			dest.dbl = float64_add(arg1.dbl, arg2.dbl);
			break;
		}
		fpu_store(frame, rd, tmax, td, &dest);
		break;

	case 0x06:	/* fsub */
		tmax = fpu_precision(t1, t2, td);
		m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1);
		m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2);
		switch (tmax) {
		case FTYPE_SNG:
			dest.sng = float32_sub(arg1.sng, arg2.sng);
			break;
		case FTYPE_DBL:
			dest.dbl = float64_sub(arg1.dbl, arg2.dbl);
			break;
		}
		fpu_store(frame, rd, tmax, td, &dest);
		break;

	case 0x07:	/* fcmp, fcmpu */
		tmax = fpu_precision(t1, t2, IGNORE_PRECISION);
		m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1);
		m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2);
		fpu_compare(frame, &arg1, &arg2, tmax, rd, td /* fcmpu */);
		break;

	case 0x09:	/* int */
do_int:
		m88110_fpu_fetch(frame, rs2, t2, t2, &dest);
		fpu_store(frame, rd, t2, FTYPE_INT, &dest);
		break;

	case 0x0a:	/* nint */
		/* round to nearest */
		frame->tf_fpcr = (old_fpcr & ~(FPCR_RD_MASK << FPCR_RD_SHIFT)) |
		    (FP_RN << FPCR_RD_SHIFT);
		goto do_int;

	case 0x0b:	/* trnc */
		/* round towards zero */
		frame->tf_fpcr = (old_fpcr & ~(FPCR_RD_MASK << FPCR_RD_SHIFT)) |
		    (FP_RZ << FPCR_RD_SHIFT);
		goto do_int;

	case 0x0e:	/* fdiv */
		tmax = fpu_precision(t1, t2, td);
		m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1);
		m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2);
		switch (tmax) {
		case FTYPE_SNG:
			dest.sng = float32_div(arg1.sng, arg2.sng);
			break;
		case FTYPE_DBL:
			dest.dbl = float64_div(arg1.dbl, arg2.dbl);
			break;
		}
		fpu_store(frame, rd, tmax, td, &dest);
		break;

	case 0x0f:	/* sqrt */
		tmax = fpu_precision(IGNORE_PRECISION, t2, td);
		m88110_fpu_fetch(frame, rs2, t2, tmax, &arg1);
		switch (tmax) {
		case FTYPE_SNG:
			dest.sng = float32_sqrt(arg1.sng);
			break;
		case FTYPE_DBL:
			dest.dbl = float64_sqrt(arg1.dbl);
			break;
		}
		fpu_store(frame, rd, tmax, td, &dest);
		break;
	}

	/*
	 * Mark new exceptions, if any, in the fpsr, and decide whether
	 * to send a signal or not.
	 */

	if (frame->tf_fpsr & old_fpcr)
		rc = SIGFPE;
	else
		rc = 0;
	frame->tf_fpsr |= old_fpsr;

	/*
	 * Restore fpcr as well.
	 */
	frame->tf_fpcr = old_fpcr;

	return (rc);
}