1/*	$OpenBSD: fp_emulate.c,v 1.25 2023/01/11 03:19:52 visa Exp $	*/
2
3/*
4 * Copyright (c) 2010 Miodrag Vallat.
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19/*
20 * Floating Point completion/emulation code (MI softfloat code control engine).
21 *
22 * Supports all MIPS IV COP1 and COP1X floating-point instructions.
23 *
24 * Floating-point load and store instructions, as well as branch instructions,
25 * are only handled if the kernel is compiled with option FPUEMUL.
26 */
27
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/kernel.h>
31#include <sys/signalvar.h>
32
33#include <machine/cpu.h>
34#include <mips64/mips_cpu.h>
35#include <machine/fpu.h>
36#include <machine/frame.h>
37#include <machine/ieee.h>
38#include <machine/ieeefp.h>
39#include <machine/mips_opcode.h>
40#include <machine/regnum.h>
41
42#include <lib/libkern/softfloat.h>
43#if defined(DEBUG) && defined(DDB)
44#include <machine/db_machdep.h>
45#endif
46
47int	fpu_emulate(struct proc *, struct trapframe *, uint32_t,
48	    union sigval *);
49int	fpu_emulate_cop1(struct proc *, struct trapframe *, uint32_t);
50int	fpu_emulate_cop1x(struct proc *, struct trapframe *, uint32_t);
51uint64_t
52	fpu_load(struct proc *, struct trapframe *, uint, uint);
53void	fpu_store(struct proc *, struct trapframe *, uint, uint, uint64_t);
54#ifdef FPUEMUL
55int	nofpu_emulate_cop1(struct proc *, struct trapframe *, uint32_t,
56	    union sigval *);
57int	nofpu_emulate_cop1x(struct proc *, struct trapframe *, uint32_t,
58	    union sigval *);
59int	nofpu_emulate_loadstore(struct proc *, struct trapframe *, uint32_t,
60	    union sigval *);
61int	nofpu_emulate_movci(struct trapframe *, uint32_t);
62#endif
63
64typedef	int (fpu_fn3)(struct proc *, struct trapframe *, uint, uint, uint,
65	    uint);
66typedef	int (fpu_fn4)(struct proc *, struct trapframe *, uint, uint, uint,
67	    uint, uint);
68fpu_fn3	fpu_abs;
69fpu_fn3	fpu_add;
70int	fpu_c(struct proc *, struct trapframe *, uint, uint, uint, uint, uint);
71fpu_fn3	fpu_ceil_l;
72fpu_fn3	fpu_ceil_w;
73fpu_fn3	fpu_cvt_d;
74fpu_fn3	fpu_cvt_l;
75fpu_fn3	fpu_cvt_s;
76fpu_fn3	fpu_cvt_w;
77fpu_fn3	fpu_div;
78fpu_fn3	fpu_floor_l;
79fpu_fn3	fpu_floor_w;
80int	fpu_int_l(struct proc *, struct trapframe *, uint, uint, uint, uint,
81	    uint);
82int	fpu_int_w(struct proc *, struct trapframe *, uint, uint, uint, uint,
83	    uint);
84fpu_fn4	fpu_madd;
85fpu_fn4	fpu_msub;
86fpu_fn3	fpu_mov;
87fpu_fn3	fpu_movcf;
88fpu_fn3	fpu_movn;
89fpu_fn3	fpu_movz;
90fpu_fn3	fpu_mul;
91fpu_fn3	fpu_neg;
92fpu_fn4	fpu_nmadd;
93fpu_fn4	fpu_nmsub;
94fpu_fn3	fpu_recip;
95fpu_fn3	fpu_round_l;
96fpu_fn3	fpu_round_w;
97fpu_fn3	fpu_rsqrt;
98fpu_fn3	fpu_sqrt;
99fpu_fn3	fpu_sub;
100fpu_fn3	fpu_trunc_l;
101fpu_fn3	fpu_trunc_w;
102
103/*
104 * Encoding of operand format within opcodes `fmt' and `fmt3' fields.
105 */
106#define	FMT_S	0x00
107#define	FMT_D	0x01
108#define	FMT_W	0x04
109#define	FMT_L	0x05
110
111/*
112 * Inlines from softfloat-specialize.h which are not made public, needed
113 * for fpu_abs.
114 */
115#define	float32_is_nan(a) \
116	(0xff000000 < (a << 1))
117#define	float32_is_signaling_nan(a) \
118	((((a >> 22) & 0x1ff) == 0x1fe) && (a & 0x003fffff))
119
120/*
121 * Precomputed results of intXX_to_floatXX(1)
122 */
123#define	ONE_F32	(float32)(SNG_EXP_BIAS << SNG_FRACBITS)
124#define	ONE_F64	(float64)((uint64_t)DBL_EXP_BIAS << DBL_FRACBITS)
125
126static inline uint32_t
127getfsr(void)
128{
129	uint32_t fsr;
130
131	__asm__ volatile (
132	"	.set	push\n"
133	"	.set	hardfloat\n"
134	"	cfc1	%0, $31\n"	/* stall until FPU done */
135	"	cfc1	%0, $31\n"	/* now get status */
136	"	.set	pop\n"
137	: "=r" (fsr));
138	return fsr;
139}
140
141static inline void
142setfsr(uint32_t fsr)
143{
144	__asm__ volatile (
145	"	.set	push\n"
146	"	.set	hardfloat\n"
147	"	ctc1	%0, $31\n"
148	"	.set	pop\n"
149	: : "r" (fsr));
150}
151
152/*
153 * Handle a floating-point exception.
154 */
155void
156MipsFPTrap(struct trapframe *tf)
157{
158	struct cpu_info *ci = curcpu();
159	struct proc *p = ci->ci_curproc;
160	union sigval sv;
161	vaddr_t pc;
162	register_t sr;
163	uint32_t fsr, excbits;
164	uint32_t branch = 0;
165	uint32_t insn;
166	InstFmt inst;
167	int sig = 0;
168	int fault_type = SI_NOINFO;
169	int update_pcb = 0;
170	int emulate = 0;
171	int skip_insn = 1;
172
173	KDASSERT(tf == p->p_md.md_regs);
174
175	pc = (vaddr_t)tf->pc;
176	if (tf->cause & CR_BR_DELAY)
177		pc += 4;
178
179	if (CPU_HAS_FPU(ci)) {
180		/*
181		 * Enable FPU, and read its status register.
182		 */
183
184		sr = getsr();
185		setsr(sr | SR_COP_1_BIT);
186		fsr = getfsr();
187
188		/*
189		 * If this is not an unimplemented operation, but a genuine
190		 * FPU exception, signal the process.
191		 */
192
193		if ((fsr & FPCSR_C_E) == 0) {
194			sig = SIGFPE;
195			goto deliver;
196		}
197	} else {
198#ifdef CPU_OCTEON
199		/*
200		 * SR_FR_32 is hardwired to zero on Octeon; make sure it is
201		 * set in the emulation view of the FPU state.
202		 */
203		tf->sr |= SR_FR_32;
204#endif
205		fsr = tf->fsr;
206	}
207
208	/*
209	 * Get the faulting instruction.  This should not fail, and
210	 * if it does, it's probably not your lucky day.
211	 */
212
213	if (copyinsn(p, pc, &insn) != 0) {
214		sig = SIGBUS;
215		fault_type = BUS_OBJERR;
216		sv.sival_ptr = (void *)pc;
217		goto deliver;
218	}
219	inst = *(InstFmt *)&insn;
220
221	if (tf->cause & CR_BR_DELAY) {
222		if (copyinsn(p, tf->pc, &branch) != 0) {
223			sig = SIGBUS;
224			fault_type = BUS_OBJERR;
225			sv.sival_ptr = (void *)tf->pc;
226			goto deliver;
227		}
228	}
229
230	/*
231	 * Emulate the instruction.
232	 */
233
234#ifdef DEBUG
235#ifdef DDB
236	printf("%s: unimplemented FPU completion, fsr 0x%08x\n0x%lx: ",
237	    p->p_p->ps_comm, fsr, pc);
238	dbmd_print_insn(insn, pc, printf);
239#else
240	printf("%s: unimplemented FPU completion, insn 0x%08x fsr 0x%08x\n",
241	    p->p_p->ps_comm, insn, fsr);
242#endif
243#endif
244
245	switch (inst.FRType.op) {
246	default:
247		/*
248		 * Not a FPU instruction.
249		 */
250		break;
251#ifdef FPUEMUL
252	case OP_SPECIAL:
253		switch (inst.FRType.func) {
254		default:
255			/*
256			 * Not a FPU instruction.
257			 */
258			break;
259		case OP_MOVCI:
260			/*
261			 * This instruction should not require emulation,
262			 * unless there is no FPU.
263			 */
264			if (!CPU_HAS_FPU(ci))
265				emulate = 1;
266			break;
267		}
268		break;
269	case OP_LDC1:
270	case OP_LWC1:
271	case OP_SDC1:
272	case OP_SWC1:
273		/*
274		 * These instructions should not require emulation,
275		 * unless there is no FPU.
276		 */
277		if (!CPU_HAS_FPU(ci))
278			emulate = 1;
279		break;
280#endif
281	case OP_COP1:
282		switch (inst.RType.rs) {
283		case OP_BC:
284			skip_insn = 0;
285			/* FALLTHROUGH */
286		case OP_MF:
287		case OP_DMF:
288		case OP_CF:
289		case OP_MT:
290		case OP_DMT:
291		case OP_CT:
292			/*
293			 * These instructions should not require emulation,
294			 * unless there is no FPU.
295			 */
296			if (!CPU_HAS_FPU(ci))
297				emulate = 1;
298			break;
299		default:
300			emulate = 1;
301			break;
302		}
303		break;
304	case OP_COP1X:
305		switch (inst.FQType.op4) {
306		default:
307			switch (inst.FRType.func) {
308#ifdef FPUEMUL
309			case OP_LDXC1:
310			case OP_LWXC1:
311			case OP_SDXC1:
312			case OP_SWXC1:
313			case OP_PREFX:
314				/*
315				 * These instructions should not require
316				 * emulation, unless there is no FPU.
317				 */
318				if (!CPU_HAS_FPU(ci))
319					emulate = 1;
320				break;
321#endif
322			default:
323				/*
324				 * Not a valid instruction.
325				 */
326				break;
327			}
328			break;
329		case OP_MADD:
330		case OP_MSUB:
331		case OP_NMADD:
332		case OP_NMSUB:
333			emulate = 1;
334			break;
335		}
336		break;
337	}
338
339	if (emulate) {
340		if (CPU_HAS_FPU(ci)) {
341			KASSERT(p == ci->ci_fpuproc);
342			save_fpu();
343		}
344
345		update_pcb = 1;
346
347		sig = fpu_emulate(p, tf, insn, &sv);
348		/* reload fsr, possibly modified by softfloat code */
349		fsr = tf->fsr;
350		if (sig == 0) {
351			/* raise SIGFPE if necessary */
352			excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
353			excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
354			if (excbits != 0)
355				sig = SIGFPE;
356		}
357	} else {
358		sig = SIGILL;
359		fault_type = ILL_ILLOPC;
360	}
361
362deliver:
363	switch (sig) {
364	case SIGFPE:
365		excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
366		excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
367		if (excbits & FP_X_INV)
368			fault_type = FPE_FLTINV;
369		else if (excbits & FP_X_DZ)
370			fault_type = FPE_INTDIV;
371		else if (excbits & FP_X_OFL)
372			fault_type = FPE_FLTUND;
373		else if (excbits & FP_X_UFL)
374			fault_type = FPE_FLTOVF;
375		else /* if (excbits & FP_X_IMP) */
376			fault_type = FPE_FLTRES;
377
378		break;
379#ifdef FPUEMUL
380	case SIGBUS:
381		if (fault_type == SI_NOINFO)
382			fault_type = BUS_ADRALN;
383		break;
384	case SIGSEGV:
385		if (fault_type == SI_NOINFO)
386			fault_type = SEGV_MAPERR;
387		break;
388#endif
389	}
390
391	/*
392	 * Skip the instruction, unless we are delivering SIGILL.
393	 */
394	if (CPU_HAS_FPU(ci) || skip_insn) {
395		if (sig != SIGILL) {
396			if (tf->cause & CR_BR_DELAY) {
397				/*
398				 * Note that it doesn't matter, at this point,
399				 * that we pass the updated FSR value, as it is
400				 * only used to decide whether to branch or not
401				 * if the faulting instruction was BC1[FT].
402				 */
403				tf->pc = MipsEmulateBranch(tf, tf->pc, fsr,
404				    branch);
405			} else
406				tf->pc += 4;
407		}
408	}
409
410	/*
411	 * Update the FPU status register.
412	 * We need to make sure that this will not cause an exception
413	 * in kernel mode.
414	 */
415
416	/* propagate raised exceptions to the sticky bits */
417	fsr &= ~FPCSR_C_E;
418	excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
419	fsr |= excbits << FPCSR_F_SHIFT;
420	/* clear all exception sources */
421	fsr &= ~FPCSR_C_MASK;
422	if (update_pcb)
423		tf->fsr = fsr;
424
425	if (CPU_HAS_FPU(ci)) {
426		setfsr(fsr);
427		/* disable fpu before returning to trap() */
428		setsr(sr);
429	}
430
431	if (sig != 0) {
432		if (sig != SIGBUS && sig != SIGSEGV)
433			sv.sival_ptr = (void *)pc;
434		trapsignal(p, sig, 0, fault_type, sv);
435	}
436}
437
438/*
439 * Emulate an FPU instruction.  The FPU register set has been saved in the
440 * current PCB, and is pointed to by the trap frame.
441 */
442int
443fpu_emulate(struct proc *p, struct trapframe *tf, uint32_t insn,
444    union sigval *sv)
445{
446	InstFmt inst;
447
448	tf->zero = 0;	/* not written by trap code */
449
450	inst = *(InstFmt *)&insn;
451
452	if (CPU_HAS_FPU(p->p_cpu)) {
453		switch (inst.FRType.op) {
454		default:
455			break;
456		case OP_COP1:
457			return fpu_emulate_cop1(p, tf, insn);
458		case OP_COP1X:
459			return fpu_emulate_cop1x(p, tf, insn);
460		}
461
462		return SIGILL;
463	}
464
465#ifdef FPUEMUL
466	switch (inst.FRType.op) {
467	default:
468		break;
469	case OP_SPECIAL:
470		return nofpu_emulate_movci(tf, insn);
471	case OP_LDC1:
472	case OP_LWC1:
473	case OP_SDC1:
474	case OP_SWC1:
475		return nofpu_emulate_loadstore(p, tf, insn, sv);
476	case OP_COP1:
477		switch (inst.RType.rs) {
478		case OP_MF:
479		case OP_DMF:
480		case OP_CF:
481		case OP_MT:
482		case OP_DMT:
483		case OP_CT:
484		case OP_BC:
485			return nofpu_emulate_cop1(p, tf, insn, sv);
486		default:
487			return fpu_emulate_cop1(p, tf, insn);
488		}
489		break;
490	case OP_COP1X:
491		switch (inst.FQType.op4) {
492		default:
493			switch (inst.FRType.func) {
494			case OP_LDXC1:
495			case OP_LWXC1:
496			case OP_SDXC1:
497			case OP_SWXC1:
498			case OP_PREFX:
499				return nofpu_emulate_cop1x(p, tf, insn, sv);
500			default:
501				break;
502			}
503			break;
504		case OP_MADD:
505		case OP_MSUB:
506		case OP_NMADD:
507		case OP_NMSUB:
508			return fpu_emulate_cop1x(p, tf, insn);
509		}
510	}
511#endif
512
513	return SIGILL;
514}
515
516/*
517 * Emulate a COP1 FPU instruction.
518 */
519int
520fpu_emulate_cop1(struct proc *p, struct trapframe *tf, uint32_t insn)
521{
522	InstFmt inst;
523	uint ft, fs, fd;
524	fpu_fn3 *fpu_op;
525	static fpu_fn3 *const fpu_ops1[1 << 6] = {
526		fpu_add,		/* 0x00 */
527		fpu_sub,
528		fpu_mul,
529		fpu_div,
530		fpu_sqrt,
531		fpu_abs,
532		fpu_mov,
533		fpu_neg,
534		fpu_round_l,		/* 0x08 */
535		fpu_trunc_l,
536		fpu_ceil_l,
537		fpu_floor_l,
538		fpu_round_w,
539		fpu_trunc_w,
540		fpu_ceil_w,
541		fpu_floor_w,
542		NULL,			/* 0x10 */
543		fpu_movcf,
544		fpu_movz,
545		fpu_movn,
546		NULL,
547		fpu_recip,
548		fpu_rsqrt,
549		NULL,
550		NULL,			/* 0x18 */
551		NULL,
552		NULL,
553		NULL,
554		NULL,
555		NULL,
556		NULL,
557		NULL,
558		fpu_cvt_s,		/* 0x20 */
559		fpu_cvt_d,
560		NULL,
561		NULL,
562		fpu_cvt_w,
563		fpu_cvt_l,
564		NULL,
565		NULL,
566		NULL,			/* 0x28 */
567		NULL,
568		NULL,
569		NULL,
570		NULL,
571		NULL,
572		NULL,
573		NULL,
574		(fpu_fn3 *)fpu_c,	/* 0x30 */
575		(fpu_fn3 *)fpu_c,
576		(fpu_fn3 *)fpu_c,
577		(fpu_fn3 *)fpu_c,
578		(fpu_fn3 *)fpu_c,
579		(fpu_fn3 *)fpu_c,
580		(fpu_fn3 *)fpu_c,
581		(fpu_fn3 *)fpu_c,
582		(fpu_fn3 *)fpu_c,	/* 0x38 */
583		(fpu_fn3 *)fpu_c,
584		(fpu_fn3 *)fpu_c,
585		(fpu_fn3 *)fpu_c,
586		(fpu_fn3 *)fpu_c,
587		(fpu_fn3 *)fpu_c,
588		(fpu_fn3 *)fpu_c,
589		(fpu_fn3 *)fpu_c
590	};
591
592	inst = *(InstFmt *)&insn;
593
594	/*
595	 * Check for valid function code.
596	 */
597
598	fpu_op = fpu_ops1[inst.FRType.func];
599	if (fpu_op == NULL)
600		return SIGILL;
601
602	/*
603	 * Check for valid format.  FRType assumes bit 25 is always set,
604	 * so we need to check for it explicitly.
605	 */
606
607	if ((insn & (1 << 25)) == 0)
608		return SIGILL;
609	switch (inst.FRType.fmt) {
610	default:
611		return SIGILL;
612	case FMT_S:
613	case FMT_D:
614	case FMT_W:
615	case FMT_L:
616		break;
617	}
618
619	/*
620	 * Check for valid register values. Only even-numbered registers
621	 * can be used if the FR bit is clear in coprocessor 0 status
622	 * register.
623	 *
624	 * Note that c.cond does not specify a register number in the fd
625	 * field, but the fd field must have zero in its low two bits, so
626	 * the test will not reject valid c.cond instructions.
627	 */
628
629	ft = inst.FRType.ft;
630	fs = inst.FRType.fs;
631	fd = inst.FRType.fd;
632	if ((tf->sr & SR_FR_32) == 0) {
633		if ((ft | fs | fd) & 1)
634			return SIGILL;
635	}
636
637	/*
638	 * Finally dispatch to the proper routine.
639	 */
640
641	if (fpu_op == (fpu_fn3 *)&fpu_c)
642		return
643		    fpu_c(p, tf, inst.FRType.fmt, ft, fs, fd, inst.FRType.func);
644	else
645		return (*fpu_op)(p, tf, inst.FRType.fmt, ft, fs, fd);
646}
647
648/*
649 * Emulate a COP1X FPU instruction.
650 */
651int
652fpu_emulate_cop1x(struct proc *p, struct trapframe *tf, uint32_t insn)
653{
654	InstFmt inst;
655	uint fr, ft, fs, fd;
656	fpu_fn4 *fpu_op;
657	static fpu_fn4 *const fpu_ops1x[1 << 3] = {
658		NULL,
659		NULL,
660		NULL,
661		NULL,
662		fpu_madd,
663		fpu_msub,
664		fpu_nmadd,
665		fpu_nmsub
666	};
667
668	inst = *(InstFmt *)&insn;
669
670	/*
671	 * Check for valid function code.
672	 */
673
674	fpu_op = fpu_ops1x[inst.FQType.op4];
675	if (fpu_op == NULL)
676		return SIGILL;
677
678	/*
679	 * Check for valid format.
680	 */
681
682	switch (inst.FQType.fmt3) {
683	default:
684		return SIGILL;
685	case FMT_S:
686	case FMT_D:
687	case FMT_W:
688	case FMT_L:
689		break;
690	}
691
692	/*
693	 * Check for valid register values. Only even-numbered registers
694	 * can be used if the FR bit is clear in coprocessor 0 status
695	 * register.
696	 */
697
698	fr = inst.FQType.fr;
699	ft = inst.FQType.ft;
700	fs = inst.FQType.fs;
701	fd = inst.FQType.fd;
702	if ((tf->sr & SR_FR_32) == 0) {
703		if ((fr | ft | fs | fd) & 1)
704			return SIGILL;
705	}
706
707	/*
708	 * Finally dispatch to the proper routine.
709	 */
710
711	return (*fpu_op)(p, tf, inst.FRType.fmt, fr, ft, fs, fd);
712}
713
714/*
715 * Load a floating-point argument according to the specified format.
716 */
717uint64_t
718fpu_load(struct proc *p, struct trapframe *tf, uint fmt, uint regno)
719{
720	uint64_t tmp, tmp2;
721
722	tmp = ((uint64_t *)p->p_md.md_regs)[FPBASE + regno];
723	if (tf->sr & SR_FR_32) {
724		switch (fmt) {
725		case FMT_D:
726		case FMT_L:
727			break;
728		case FMT_S:
729		case FMT_W:
730			tmp &= 0xffffffff;
731			break;
732		}
733	} else {
734		tmp &= 0xffffffff;
735		switch (fmt) {
736		case FMT_D:
737		case FMT_L:
738			/* caller has enforced regno is even */
739			tmp2 =
740			    ((uint64_t *)p->p_md.md_regs)[FPBASE + regno + 1];
741			tmp |= tmp2 << 32;
742			break;
743		case FMT_S:
744		case FMT_W:
745			break;
746		}
747	}
748
749	return tmp;
750}
751
752/*
753 * Store a floating-point result according to the specified format.
754 */
755void
756fpu_store(struct proc *p, struct trapframe *tf, uint fmt, uint regno,
757    uint64_t rslt)
758{
759	if (tf->sr & SR_FR_32) {
760		((uint64_t *)p->p_md.md_regs)[FPBASE + regno] = rslt;
761	} else {
762		/* caller has enforced regno is even */
763		((uint64_t *)p->p_md.md_regs)[FPBASE + regno] =
764		    rslt & 0xffffffff;
765		((uint64_t *)p->p_md.md_regs)[FPBASE + regno + 1] =
766		    (rslt >> 32) & 0xffffffff;
767	}
768}
769
770/*
771 * Integer conversion
772 */
773
774int
775fpu_int_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
776    uint fd, uint rm)
777{
778	uint64_t raw;
779	uint32_t oldrm;
780
781	if (ft != 0)
782		return SIGILL;
783	if (fmt != FMT_S && fmt != FMT_D)
784		return SIGILL;
785
786	raw = fpu_load(p, tf, fmt, fs);
787
788	/* round towards required mode */
789	oldrm = tf->fsr & FPCSR_RM_MASK;
790	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
791	if (fmt == FMT_S)
792		raw = float32_to_int64((float32)raw);
793	else
794		raw = float64_to_int64((float64)raw);
795	/* restore rounding mode */
796	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;
797
798	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
799		fpu_store(p, tf, fmt, fd, raw);
800
801	return 0;
802}
803
804int
805fpu_int_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
806    uint fd, uint rm)
807{
808	uint64_t raw;
809	uint32_t oldrm;
810
811	if (ft != 0)
812		return SIGILL;
813	if (fmt != FMT_S && fmt != FMT_D)
814		return SIGILL;
815
816	raw = fpu_load(p, tf, fmt, fs);
817
818	/* round towards required mode */
819	oldrm = tf->fsr & FPCSR_RM_MASK;
820	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
821	if (fmt == FMT_S)
822		raw = float32_to_int32((float32)raw);
823	else
824		raw = float64_to_int32((float64)raw);
825	/* restore rounding mode */
826	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;
827
828	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
829		fpu_store(p, tf, fmt, fd, raw);
830
831	return 0;
832}
833
834/*
835 * FPU Instruction emulation
836 */
837
838int
839fpu_abs(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
840    uint fd)
841{
842	uint64_t raw;
843
844	if (ft != 0)
845		return SIGILL;
846	if (fmt != FMT_S && fmt != FMT_D)
847		return SIGILL;
848
849	raw = fpu_load(p, tf, fmt, fs);
850	/* clear sign bit unless NaN */
851	if (fmt == FMT_S) {
852		float32 f32 = (float32)raw;
853		if (float32_is_nan(f32)) {
854			float_set_invalid();
855		} else {
856			f32 &= ~(1L << 31);
857			raw = (uint64_t)f32;
858		}
859	} else {
860		float64 f64 = (float64)raw;
861		if (float64_is_nan(f64)) {
862			float_set_invalid();
863		} else {
864			f64 &= ~(1L << 63);
865			raw = (uint64_t)f64;
866		}
867	}
868	fpu_store(p, tf, fmt, fd, raw);
869
870	return 0;
871}
872
873int
874fpu_add(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
875    uint fd)
876{
877	uint64_t raw1, raw2, rslt;
878
879	if (fmt != FMT_S && fmt != FMT_D)
880		return SIGILL;
881
882	raw1 = fpu_load(p, tf, fmt, fs);
883	raw2 = fpu_load(p, tf, fmt, ft);
884	if (fmt == FMT_S) {
885		float32 f32 = float32_add((float32)raw1, (float32)raw2);
886		rslt = (uint64_t)f32;
887	} else {
888		float64 f64 = float64_add((float64)raw1, (float64)raw2);
889		rslt = (uint64_t)f64;
890	}
891	fpu_store(p, tf, fmt, fd, rslt);
892
893	return 0;
894}
895
896int
897fpu_c(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
898    uint fd, uint op)
899{
900	uint64_t raw1, raw2;
901	uint cc, lt, eq, uo;
902
903	if ((fd & 0x03) != 0)
904		return SIGILL;
905	if (fmt != FMT_S && fmt != FMT_D)
906		return SIGILL;
907
908	lt = eq = uo = 0;
909	cc = fd >> 2;
910
911	raw1 = fpu_load(p, tf, fmt, fs);
912	raw2 = fpu_load(p, tf, fmt, ft);
913
914	if (fmt == FMT_S) {
915		float32 f32a = (float32)raw1;
916		float32 f32b = (float32)raw2;
917		if (float32_is_nan(f32a)) {
918			uo = 1 << 0;
919			if (float32_is_signaling_nan(f32a))
920				op |= 0x08;	/* force invalid exception */
921		}
922		if (float32_is_nan(f32b)) {
923			uo = 1 << 0;
924			if (float32_is_signaling_nan(f32b))
925				op |= 0x08;	/* force invalid exception */
926		}
927		if (uo == 0) {
928			if (float32_eq(f32a, f32b))
929				eq = 1 << 1;
930			else if (float32_lt(f32a, f32b))
931				lt = 1 << 2;
932		}
933	} else {
934		float64 f64a = (float64)raw1;
935		float64 f64b = (float64)raw2;
936		if (float64_is_nan(f64a)) {
937			uo = 1 << 0;
938			if (float64_is_signaling_nan(f64a))
939				op |= 0x08;	/* force invalid exception */
940		}
941		if (float64_is_nan(f64b)) {
942			uo = 1 << 0;
943			if (float64_is_signaling_nan(f64b))
944				op |= 0x08;	/* force invalid exception */
945		}
946		if (uo == 0) {
947			if (float64_eq(f64a, f64b))
948				eq = 1 << 1;
949			else if (float64_lt(f64a, f64b))
950				lt = 1 << 2;
951		}
952	}
953
954	if (uo && (op & 0x08)) {
955		float_set_invalid();
956		if (tf->fsr & FPCSR_E_V) {
957			/* comparison result intentionally not written */
958			goto skip;
959		}
960	}
961
962	if ((uo | eq | lt) & op)
963		tf->fsr |= FPCSR_CONDVAL(cc);
964	else
965		tf->fsr &= ~FPCSR_CONDVAL(cc);
966skip:
967
968	return 0;
969}
970
971int
972fpu_ceil_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
973    uint fd)
974{
975	/* round towards positive infinity */
976	return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RP);
977}
978
979int
980fpu_ceil_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
981    uint fd)
982{
983	/* round towards positive infinity */
984	return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RP);
985}
986
987int
988fpu_cvt_d(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
989    uint fd)
990{
991	uint64_t raw;
992
993	if (ft != 0)
994		return SIGILL;
995	if (fmt == FMT_D)
996		return SIGILL;
997
998	raw = fpu_load(p, tf, fmt, fs);
999	switch (fmt) {
1000	case FMT_L:
1001		raw = int64_to_float64((int64_t)raw);
1002		break;
1003	case FMT_S:
1004		raw = float32_to_float64((float32)raw);
1005		break;
1006	case FMT_W:
1007		raw = int32_to_float64((int32_t)raw);
1008		break;
1009	}
1010	fpu_store(p, tf, fmt, fd, raw);
1011
1012	return 0;
1013}
1014
1015int
1016fpu_cvt_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1017    uint fd)
1018{
1019	uint64_t raw;
1020	uint32_t rm;
1021
1022	if (ft != 0)
1023		return SIGILL;
1024	if (fmt != FMT_S && fmt != FMT_D)
1025		return SIGILL;
1026
1027	rm = tf->fsr & FPCSR_RM_MASK;
1028	raw = fpu_load(p, tf, fmt, fs);
1029	if (fmt == FMT_D) {
1030		if (rm == FP_RZ)
1031			raw = float64_to_int64_round_to_zero((float64)raw);
1032		else
1033			raw = float64_to_int64((float64)raw);
1034	} else {
1035		if (rm == FP_RZ)
1036			raw = float32_to_int64_round_to_zero((float32)raw);
1037		else
1038			raw = float32_to_int64((float32)raw);
1039	}
1040	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
1041		fpu_store(p, tf, fmt, fd, raw);
1042
1043	return 0;
1044}
1045
1046int
1047fpu_cvt_s(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1048    uint fd)
1049{
1050	uint64_t raw;
1051
1052	if (ft != 0)
1053		return SIGILL;
1054	if (fmt == FMT_S)
1055		return SIGILL;
1056
1057	raw = fpu_load(p, tf, fmt, fs);
1058	switch (fmt) {
1059	case FMT_D:
1060		raw = float64_to_float32((float64)raw);
1061		break;
1062	case FMT_L:
1063		raw = int64_to_float32((int64_t)raw);
1064		break;
1065	case FMT_W:
1066		raw = int32_to_float32((int32_t)raw);
1067		break;
1068	}
1069	fpu_store(p, tf, fmt, fd, raw);
1070
1071	return 0;
1072}
1073
1074int
1075fpu_cvt_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1076    uint fd)
1077{
1078	uint64_t raw;
1079	uint32_t rm;
1080
1081	if (ft != 0)
1082		return SIGILL;
1083	if (fmt != FMT_S && fmt != FMT_D)
1084		return SIGILL;
1085
1086	rm = tf->fsr & FPCSR_RM_MASK;
1087	raw = fpu_load(p, tf, fmt, fs);
1088	if (fmt == FMT_D) {
1089		if (rm == FP_RZ)
1090			raw = float64_to_int32_round_to_zero((float64)raw);
1091		else
1092			raw = float64_to_int32((float64)raw);
1093	} else {
1094		if (rm == FP_RZ)
1095			raw = float32_to_int32_round_to_zero((float32)raw);
1096		else
1097			raw = float32_to_int32((float32)raw);
1098	}
1099	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
1100		fpu_store(p, tf, fmt, fd, raw);
1101
1102	return 0;
1103}
1104
1105int
1106fpu_div(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1107    uint fd)
1108{
1109	uint64_t raw1, raw2, rslt;
1110
1111	if (fmt != FMT_S && fmt != FMT_D)
1112		return SIGILL;
1113
1114	raw1 = fpu_load(p, tf, fmt, fs);
1115	raw2 = fpu_load(p, tf, fmt, ft);
1116	if (fmt == FMT_S) {
1117		float32 f32 = float32_div((float32)raw1, (float32)raw2);
1118		rslt = (uint64_t)f32;
1119	} else {
1120		float64 f64 = float64_div((float64)raw1, (float64)raw2);
1121		rslt = (uint64_t)f64;
1122	}
1123	fpu_store(p, tf, fmt, fd, rslt);
1124
1125	return 0;
1126}
1127
1128int
1129fpu_floor_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1130    uint fd)
1131{
1132	/* round towards negative infinity */
1133	return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RM);
1134}
1135
1136int
1137fpu_floor_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1138    uint fd)
1139{
1140	/* round towards negative infinity */
1141	return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RM);
1142}
1143
1144int
1145fpu_madd(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
1146    uint fs, uint fd)
1147{
1148	uint64_t raw1, raw2, raw3, rslt;
1149
1150	if (fmt != FMT_S && fmt != FMT_D)
1151		return SIGILL;
1152
1153	raw1 = fpu_load(p, tf, fmt, fs);
1154	raw2 = fpu_load(p, tf, fmt, ft);
1155	raw3 = fpu_load(p, tf, fmt, fr);
1156	if (fmt == FMT_S) {
1157		float32 f32 = float32_add(
1158		    float32_mul((float32)raw1, (float32)raw2),
1159		    (float32)raw3);
1160		rslt = (uint64_t)f32;
1161	} else {
1162		float64 f64 = float64_add(
1163		    float64_mul((float64)raw1, (float64)raw2),
1164		    (float64)raw3);
1165		rslt = (uint64_t)f64;
1166	}
1167	fpu_store(p, tf, fmt, fd, rslt);
1168
1169	return 0;
1170}
1171
1172int
1173fpu_mov(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1174    uint fd)
1175{
1176	uint64_t raw;
1177
1178	if (ft != 0)
1179		return SIGILL;
1180	if (fmt != FMT_S && fmt != FMT_D)
1181		return SIGILL;
1182
1183	raw = fpu_load(p, tf, fmt, fs);
1184	fpu_store(p, tf, fmt, fd, raw);
1185
1186	return 0;
1187}
1188
1189int
1190fpu_movcf(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1191    uint fd)
1192{
1193	uint64_t raw;
1194	uint cc, istf;
1195	int condition;
1196
1197	if ((ft & 0x02) != 0)
1198		return SIGILL;
1199	cc = ft >> 2;
1200	if (fmt != FMT_S && fmt != FMT_D)
1201		return SIGILL;
1202
1203	condition = tf->fsr & FPCSR_CONDVAL(cc);
1204	istf = ft & COPz_BC_TF_MASK;
1205	if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
1206		raw = fpu_load(p, tf, fmt, fs);
1207		fpu_store(p, tf, fmt, fd, raw);
1208	}
1209
1210	return 0;
1211}
1212
1213int
1214fpu_movn(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1215    uint fd)
1216{
1217	register_t *regs = (register_t *)tf;
1218	uint64_t raw;
1219
1220	if (fmt != FMT_S && fmt != FMT_D)
1221		return SIGILL;
1222
1223	if (ft != ZERO && regs[ft] != 0) {
1224		raw = fpu_load(p, tf, fmt, fs);
1225		fpu_store(p, tf, fmt, fd, raw);
1226	}
1227
1228	return 0;
1229}
1230
1231int
1232fpu_movz(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1233    uint fd)
1234{
1235	register_t *regs = (register_t *)tf;
1236	uint64_t raw;
1237
1238	if (fmt != FMT_S && fmt != FMT_D)
1239		return SIGILL;
1240
1241	if (ft == ZERO || regs[ft] == 0) {
1242		raw = fpu_load(p, tf, fmt, fs);
1243		fpu_store(p, tf, fmt, fd, raw);
1244	}
1245
1246	return 0;
1247}
1248
1249int
1250fpu_msub(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
1251    uint fs, uint fd)
1252{
1253	uint64_t raw1, raw2, raw3, rslt;
1254
1255	if (fmt != FMT_S && fmt != FMT_D)
1256		return SIGILL;
1257
1258	raw1 = fpu_load(p, tf, fmt, fs);
1259	raw2 = fpu_load(p, tf, fmt, ft);
1260	raw3 = fpu_load(p, tf, fmt, fr);
1261	if (fmt == FMT_S) {
1262		float32 f32 = float32_sub(
1263		    float32_mul((float32)raw1, (float32)raw2),
1264		    (float32)raw3);
1265		rslt = (uint64_t)f32;
1266	} else {
1267		float64 f64 = float64_sub(
1268		    float64_mul((float64)raw1, (float64)raw2),
1269		    (float64)raw3);
1270		rslt = (uint64_t)f64;
1271	}
1272	fpu_store(p, tf, fmt, fd, rslt);
1273
1274	return 0;
1275}
1276
1277int
1278fpu_mul(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1279    uint fd)
1280{
1281	uint64_t raw1, raw2, rslt;
1282
1283	if (fmt != FMT_S && fmt != FMT_D)
1284		return SIGILL;
1285
1286	raw1 = fpu_load(p, tf, fmt, fs);
1287	raw2 = fpu_load(p, tf, fmt, ft);
1288	if (fmt == FMT_S) {
1289		float32 f32 = float32_mul((float32)raw1, (float32)raw2);
1290		rslt = (uint64_t)f32;
1291	} else {
1292		float64 f64 = float64_mul((float64)raw1, (float64)raw2);
1293		rslt = (uint64_t)f64;
1294	}
1295	fpu_store(p, tf, fmt, fd, rslt);
1296
1297	return 0;
1298}
1299
1300int
1301fpu_neg(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1302    uint fd)
1303{
1304	uint64_t raw;
1305
1306	if (ft != 0)
1307		return SIGILL;
1308	if (fmt != FMT_S && fmt != FMT_D)
1309		return SIGILL;
1310
1311	raw = fpu_load(p, tf, fmt, fs);
1312	/* flip sign bit unless NaN */
1313	if (fmt == FMT_S) {
1314		float32 f32 = (float32)raw;
1315		if (float32_is_nan(f32)) {
1316			float_set_invalid();
1317		} else {
1318			f32 ^= 1L << 31;
1319			raw = (uint64_t)f32;
1320		}
1321	} else {
1322		float64 f64 = (float64)raw;
1323		if (float64_is_nan(f64)) {
1324			float_set_invalid();
1325		} else {
1326			f64 ^= 1L << 63;
1327			raw = (uint64_t)f64;
1328		}
1329	}
1330	fpu_store(p, tf, fmt, fd, raw);
1331
1332	return 0;
1333}
1334
1335int
1336fpu_nmadd(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
1337    uint fs, uint fd)
1338{
1339	uint64_t raw1, raw2, raw3, rslt;
1340
1341	if (fmt != FMT_S && fmt != FMT_D)
1342		return SIGILL;
1343
1344	raw1 = fpu_load(p, tf, fmt, fs);
1345	raw2 = fpu_load(p, tf, fmt, ft);
1346	raw3 = fpu_load(p, tf, fmt, fr);
1347	if (fmt == FMT_S) {
1348		float32 f32 = float32_add(
1349		    float32_mul((float32)raw1, (float32)raw2),
1350		    (float32)raw3);
1351		if (float32_is_nan(f32))
1352			float_set_invalid();
1353		else
1354			f32 ^= 1L << 31;
1355		rslt = (uint64_t)f32;
1356	} else {
1357		float64 f64 = float64_add(
1358		    float64_mul((float64)raw1, (float64)raw2),
1359		    (float64)raw3);
1360		if (float64_is_nan(f64))
1361			float_set_invalid();
1362		else
1363			f64 ^= 1L << 63;
1364		rslt = (uint64_t)f64;
1365	}
1366	fpu_store(p, tf, fmt, fd, rslt);
1367
1368	return 0;
1369}
1370
1371int
1372fpu_nmsub(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
1373    uint fs, uint fd)
1374{
1375	uint64_t raw1, raw2, raw3, rslt;
1376
1377	if (fmt != FMT_S && fmt != FMT_D)
1378		return SIGILL;
1379
1380	raw1 = fpu_load(p, tf, fmt, fs);
1381	raw2 = fpu_load(p, tf, fmt, ft);
1382	raw3 = fpu_load(p, tf, fmt, fr);
1383	if (fmt == FMT_S) {
1384		float32 f32 = float32_sub(
1385		    float32_mul((float32)raw1, (float32)raw2),
1386		    (float32)raw3);
1387		if (float32_is_nan(f32))
1388			float_set_invalid();
1389		else
1390			f32 ^= 1L << 31;
1391		rslt = (uint64_t)f32;
1392	} else {
1393		float64 f64 = float64_sub(
1394		    float64_mul((float64)raw1, (float64)raw2),
1395		    (float64)raw3);
1396		if (float64_is_nan(f64))
1397			float_set_invalid();
1398		else
1399			f64 ^= 1L << 63;
1400		rslt = (uint64_t)f64;
1401	}
1402	fpu_store(p, tf, fmt, fd, rslt);
1403
1404	return 0;
1405}
1406
1407int
1408fpu_recip(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1409    uint fd)
1410{
1411	uint64_t raw;
1412
1413	if (ft != 0)
1414		return SIGILL;
1415	if (fmt != FMT_S && fmt != FMT_D)
1416		return SIGILL;
1417
1418	raw = fpu_load(p, tf, fmt, fs);
1419	if (fmt == FMT_S) {
1420		float32 f32 = float32_div(ONE_F32, (float32)raw);
1421		raw = (uint64_t)f32;
1422	} else {
1423		float64 f64 = float64_div(ONE_F64, (float64)raw);
1424		raw = (uint64_t)f64;
1425	}
1426	fpu_store(p, tf, fmt, fd, raw);
1427
1428	return 0;
1429}
1430
1431int
1432fpu_round_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1433    uint fd)
1434{
1435	/* round towards nearest */
1436	return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RN);
1437}
1438
1439int
1440fpu_round_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1441    uint fd)
1442{
1443	/* round towards nearest */
1444	return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RN);
1445}
1446
1447int
1448fpu_rsqrt(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1449    uint fd)
1450{
1451	uint64_t raw;
1452
1453	if (ft != 0)
1454		return SIGILL;
1455	if (fmt != FMT_S && fmt != FMT_D)
1456		return SIGILL;
1457
1458	raw = fpu_load(p, tf, fmt, fs);
1459	if (fmt == FMT_S) {
1460		float32 f32 = float32_sqrt((float32)raw);
1461		if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
1462		    (FPCSR_C_V | FPCSR_E_V))
1463			f32 = float32_div(ONE_F32, f32);
1464		raw = (uint64_t)f32;
1465	} else {
1466		float64 f64 = float64_sqrt((float64)raw);
1467		if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
1468		    (FPCSR_C_V | FPCSR_E_V))
1469			f64 = float64_div(ONE_F64, f64);
1470		raw = (uint64_t)f64;
1471	}
1472	fpu_store(p, tf, fmt, fd, raw);
1473
1474	return 0;
1475}
1476
1477int
1478fpu_sqrt(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1479    uint fd)
1480{
1481	uint64_t raw;
1482
1483	if (ft != 0)
1484		return SIGILL;
1485	if (fmt != FMT_S && fmt != FMT_D)
1486		return SIGILL;
1487
1488	raw = fpu_load(p, tf, fmt, fs);
1489	if (fmt == FMT_S) {
1490		float32 f32 = float32_sqrt((float32)raw);
1491		raw = (uint64_t)f32;
1492	} else {
1493		float64 f64 = float64_sqrt((float64)raw);
1494		raw = (uint64_t)f64;
1495	}
1496	fpu_store(p, tf, fmt, fd, raw);
1497
1498	return 0;
1499}
1500
1501int
1502fpu_sub(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1503    uint fd)
1504{
1505	uint64_t raw1, raw2, rslt;
1506
1507	if (fmt != FMT_S && fmt != FMT_D)
1508		return SIGILL;
1509
1510	raw1 = fpu_load(p, tf, fmt, fs);
1511	raw2 = fpu_load(p, tf, fmt, ft);
1512	if (fmt == FMT_S) {
1513		float32 f32 = float32_sub((float32)raw1, (float32)raw2);
1514		rslt = (uint64_t)f32;
1515	} else {
1516		float64 f64 = float64_sub((float64)raw1, (float64)raw2);
1517		rslt = (uint64_t)f64;
1518	}
1519	fpu_store(p, tf, fmt, fd, rslt);
1520
1521	return 0;
1522}
1523
1524int
1525fpu_trunc_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1526    uint fd)
1527{
1528	/* round towards zero */
1529	return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RZ);
1530}
1531
1532int
1533fpu_trunc_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1534    uint fd)
1535{
1536	/* round towards zero */
1537	return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RZ);
1538}
1539
1540#ifdef FPUEMUL
1541
1542/*
1543 * Emulate a COP1 non-FPU instruction.
1544 */
1545int
1546nofpu_emulate_cop1(struct proc *p, struct trapframe *tf, uint32_t insn,
1547    union sigval *sv)
1548{
1549	register_t *regs = (register_t *)tf;
1550	InstFmt inst;
1551	int32_t cval;
1552
1553	inst = *(InstFmt *)&insn;
1554
1555	switch (inst.RType.rs) {
1556	case OP_MF:
1557		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1558			return SIGILL;
1559		if (inst.FRType.ft != ZERO)
1560			regs[inst.FRType.ft] = (int32_t)
1561			    ((uint64_t *)p->p_md.md_regs)
1562			      [FPBASE + inst.FRType.fs];
1563		break;
1564	case OP_DMF:
1565		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1566			return SIGILL;
1567		if ((tf->sr & SR_FR_32) != 0 || (inst.FRType.fs & 1) == 0) {
1568			if (inst.FRType.ft != ZERO)
1569				regs[inst.FRType.ft] =
1570				    fpu_load(p, tf, FMT_L, inst.FRType.fs);
1571		}
1572		break;
1573	case OP_CF:
1574		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1575			return SIGILL;
1576		if (inst.FRType.ft != ZERO) {
1577			switch (inst.FRType.fs) {
1578			case 0:	/* FPC_ID */
1579				cval = MIPS_SOFT << 8;
1580				break;
1581			case 31: /* FPC_CSR */
1582				cval = (int32_t)tf->fsr;
1583				break;
1584			default:
1585				cval = 0;
1586				break;
1587			}
1588			regs[inst.FRType.ft] = (int64_t)cval;
1589		}
1590		break;
1591	case OP_MT:
1592		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1593			return SIGILL;
1594		((uint64_t *)p->p_md.md_regs)[FPBASE + inst.FRType.fs] =
1595		    (int32_t)regs[inst.FRType.ft];
1596		break;
1597	case OP_DMT:
1598		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1599			return SIGILL;
1600		if ((tf->sr & SR_FR_32) != 0 || (inst.FRType.fs & 1) == 0) {
1601			fpu_store(p, tf, FMT_L, inst.FRType.fs,
1602			    regs[inst.FRType.ft]);
1603		}
1604		break;
1605	case OP_CT:
1606		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1607			return SIGILL;
1608		cval = (int32_t)regs[inst.FRType.ft];
1609		switch (inst.FRType.fs) {
1610		case 31: /* FPC_CSR */
1611			cval &= ~FPCSR_C_E;
1612			tf->fsr = cval;
1613			break;
1614		case 0:	/* FPC_ID */
1615		default:
1616			break;
1617		}
1618		break;
1619	case OP_BC:
1620	   {
1621		uint cc, nd, istf;
1622		int condition;
1623		vaddr_t dest;
1624		uint32_t dinsn;
1625
1626		cc = (inst.RType.rt & COPz_BC_CC_MASK) >> COPz_BC_CC_SHIFT;
1627		nd = inst.RType.rt & COPz_BCL_TF_MASK;
1628		istf = inst.RType.rt & COPz_BC_TF_MASK;
1629		condition = tf->fsr & FPCSR_CONDVAL(cc);
1630		if ((!condition && !istf) /*bc1f*/ ||
1631		    (condition && istf) /*bc1t*/) {
1632			/*
1633			 * Branch taken: if the delay slot is not a nop,
1634			 * copy the delay slot instruction to the dedicated
1635			 * relocation page, in order to be able to have the
1636			 * cpu process it and give control back to the
1637			 * kernel, for us to redirect to the branch
1638			 * destination.
1639			 */
1640			/* inline MipsEmulateBranch(tf, tf->pc, tf->fsr, insn)*/
1641			dest = tf->pc + 4 + ((short)inst.IType.imm << 2);
1642			if (copyinsn(p, tf->pc + 4, &dinsn) != 0) {
1643				sv->sival_ptr = (void *)(tf->pc + 4);
1644				return SIGSEGV;
1645			}
1646			if (dinsn == 0x00000000 /* nop */ ||
1647			    dinsn == 0x00000040 /* ssnop */) {
1648				tf->pc = dest;
1649			} else {
1650				if (fpe_branch_emulate(curproc, tf, dinsn,
1651				    dest) != 0)
1652					return SIGILL;
1653			}
1654		} else {
1655			/*
1656			 * Branch not taken: skip the instruction, and
1657			 * skip the delay slot if it was a `branch likely'
1658			 * instruction.
1659			 */
1660			tf->pc += 4;
1661			if (nd)
1662				tf->pc += 4;
1663		}
1664	    }
1665		break;
1666	}
1667
1668	return 0;
1669}
1670
1671/*
1672 * Emulate a COP1X non-FPU instruction.
1673 */
1674int
1675nofpu_emulate_cop1x(struct proc *p, struct trapframe *tf, uint32_t insn,
1676    union sigval *sv)
1677{
1678	register_t *regs = (register_t *)tf;
1679	InstFmt inst;
1680	vaddr_t va;
1681	uint64_t ddata;
1682	uint32_t wdata;
1683
1684	inst = *(InstFmt *)&insn;
1685	switch (inst.FRType.func) {
1686	case OP_LDXC1:
1687		if (inst.FQType.fs != 0)
1688			return SIGILL;
1689		va = (vaddr_t)regs[inst.FQType.fr] +
1690		    (vaddr_t)regs[inst.FQType.ft];
1691		if ((va & 0x07) != 0) {
1692			sv->sival_ptr = (void *)va;
1693			return SIGBUS;
1694		}
1695		if (copyin((const void *)va, &ddata, sizeof ddata) != 0) {
1696			sv->sival_ptr = (void *)va;
1697			return SIGSEGV;
1698		}
1699		if ((tf->sr & SR_FR_32) != 0 || (inst.FQType.fd & 1) == 0)
1700			fpu_store(p, tf, FMT_L, inst.FQType.fd, ddata);
1701		break;
1702	case OP_LWXC1:
1703		if (inst.FQType.fs != 0)
1704			return SIGILL;
1705		va = (vaddr_t)regs[inst.FQType.fr] +
1706		    (vaddr_t)regs[inst.FQType.ft];
1707		if ((va & 0x03) != 0) {
1708			sv->sival_ptr = (void *)va;
1709			return SIGBUS;
1710		}
1711		if (copyin((const void *)va, &wdata, sizeof wdata) != 0) {
1712			sv->sival_ptr = (void *)va;
1713			return SIGSEGV;
1714		}
1715		((uint64_t *)p->p_md.md_regs)[FPBASE + inst.FQType.fd] = wdata;
1716		break;
1717	case OP_SDXC1:
1718		if (inst.FQType.fd != 0)
1719			return SIGILL;
1720		va = (vaddr_t)regs[inst.FQType.fr] +
1721		    (vaddr_t)regs[inst.FQType.ft];
1722		if ((va & 0x07) != 0) {
1723			sv->sival_ptr = (void *)va;
1724			return SIGBUS;
1725		}
1726		if ((tf->sr & SR_FR_32) != 0 || (inst.FQType.fs & 1) == 0)
1727			ddata = fpu_load(p, tf, FMT_L, inst.FQType.fs);
1728		else {
1729			/* undefined behaviour, don't expose stack content */
1730			ddata = 0;
1731		}
1732		if (copyout(&ddata, (void *)va, sizeof ddata) != 0) {
1733			sv->sival_ptr = (void *)va;
1734			return SIGSEGV;
1735		}
1736		break;
1737	case OP_SWXC1:
1738		if (inst.FQType.fd != 0)
1739			return SIGILL;
1740		va = (vaddr_t)regs[inst.FQType.fr] +
1741		    (vaddr_t)regs[inst.FQType.ft];
1742		if ((va & 0x03) != 0) {
1743			sv->sival_ptr = (void *)va;
1744			return SIGBUS;
1745		}
1746		wdata = ((uint64_t *)p->p_md.md_regs)[FPBASE + inst.FQType.fs];
1747		if (copyout(&wdata, (void *)va, sizeof wdata) != 0) {
1748			sv->sival_ptr = (void *)va;
1749			return SIGSEGV;
1750		}
1751		break;
1752	case OP_PREFX:
1753		/* nothing to do */
1754		break;
1755	}
1756
1757	return 0;
1758}
1759
1760/*
1761 * Emulate a load/store instruction on FPU registers.
1762 */
1763int
1764nofpu_emulate_loadstore(struct proc *p, struct trapframe *tf, uint32_t insn,
1765    union sigval *sv)
1766{
1767	register_t *regs = (register_t *)tf;
1768	InstFmt inst;
1769	vaddr_t va;
1770	uint64_t ddata;
1771	uint32_t wdata;
1772
1773	inst = *(InstFmt *)&insn;
1774	switch (inst.IType.op) {
1775	case OP_LDC1:
1776		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1777		if ((va & 0x07) != 0) {
1778			sv->sival_ptr = (void *)va;
1779			return SIGBUS;
1780		}
1781		if (copyin((const void *)va, &ddata, sizeof ddata) != 0) {
1782			sv->sival_ptr = (void *)va;
1783			return SIGSEGV;
1784		}
1785		if ((tf->sr & SR_FR_32) != 0 || (inst.IType.rt & 1) == 0)
1786			fpu_store(p, tf, FMT_L, inst.IType.rt, ddata);
1787		break;
1788	case OP_LWC1:
1789		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1790		if ((va & 0x03) != 0) {
1791			sv->sival_ptr = (void *)va;
1792			return SIGBUS;
1793		}
1794		if (copyin((const void *)va, &wdata, sizeof wdata) != 0) {
1795			sv->sival_ptr = (void *)va;
1796			return SIGSEGV;
1797		}
1798		((uint64_t *)p->p_md.md_regs)[FPBASE + inst.IType.rt] = wdata;
1799		break;
1800	case OP_SDC1:
1801		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1802		if ((va & 0x07) != 0) {
1803			sv->sival_ptr = (void *)va;
1804			return SIGBUS;
1805		}
1806		if ((tf->sr & SR_FR_32) != 0 || (inst.IType.rt & 1) == 0)
1807			ddata = fpu_load(p, tf, FMT_L, inst.IType.rt);
1808		else {
1809			/* undefined behaviour, don't expose stack content */
1810			ddata = 0;
1811		}
1812		if (copyout(&ddata, (void *)va, sizeof ddata) != 0) {
1813			sv->sival_ptr = (void *)va;
1814			return SIGSEGV;
1815		}
1816		break;
1817	case OP_SWC1:
1818		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1819		if ((va & 0x03) != 0) {
1820			sv->sival_ptr = (void *)va;
1821			return SIGBUS;
1822		}
1823		wdata = ((uint64_t *)p->p_md.md_regs)[FPBASE + inst.IType.rt];
1824		if (copyout(&wdata, (void *)va, sizeof wdata) != 0) {
1825			sv->sival_ptr = (void *)va;
1826			return SIGSEGV;
1827		}
1828		break;
1829	}
1830
1831	return 0;
1832}
1833
1834/*
1835 * Emulate MOVF and MOVT.
1836 */
1837int
1838nofpu_emulate_movci(struct trapframe *tf, uint32_t insn)
1839{
1840	register_t *regs = (register_t *)tf;
1841	InstFmt inst;
1842	uint cc, istf;
1843	int condition;
1844
1845	inst = *(InstFmt *)&insn;
1846	if ((inst.RType.rt & 0x02) != 0 || inst.RType.shamt != 0)
1847		return SIGILL;
1848
1849	cc = inst.RType.rt >> 2;
1850	istf = inst.RType.rt & COPz_BC_TF_MASK;
1851	condition = tf->fsr & FPCSR_CONDVAL(cc);
1852	if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
1853		if (inst.RType.rd != ZERO)
1854			regs[inst.RType.rd] = regs[inst.RType.rs];
1855	}
1856
1857	return 0;
1858}
1859
1860#endif	/* FPUEMUL */
1861