fp_emulate.c revision 1.4
1/*	$OpenBSD: fp_emulate.c,v 1.4 2011/07/06 21:41:37 art Exp $	*/
2
3/*
4 * Copyright (c) 2010 Miodrag Vallat.
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19/*
20 * Floating Point completion/emulation code (MI softfloat code control engine).
21 *
22 * Supports all MIPS IV COP1 and COP1X floating-point instructions.
23 */
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/kernel.h>
28#include <sys/signalvar.h>
29
30#include <machine/cpu.h>
31#include <machine/fpu.h>
32#include <machine/frame.h>
33#include <machine/ieee.h>
34#include <machine/ieeefp.h>
35#include <machine/mips_opcode.h>
36#include <machine/regnum.h>
37
38#include <lib/libkern/softfloat.h>
39#if defined(DEBUG) && defined(DDB)
40#include <machine/db_machdep.h>
41#endif
42
43int	fpu_emulate(struct trap_frame *, uint32_t, union sigval *);
44int	fpu_emulate_cop1(struct trap_frame *, uint32_t);
45int	fpu_emulate_cop1x(struct trap_frame *, uint32_t);
46uint64_t
47	fpu_load(struct trap_frame *, uint, uint);
48void	fpu_store(struct trap_frame *, uint, uint, uint64_t);
49#ifdef FPUEMUL
50int	nofpu_emulate_cop1(struct trap_frame *, uint32_t, union sigval *);
51int	nofpu_emulate_cop1x(struct trap_frame *, uint32_t, union sigval *);
52int	nofpu_emulate_loadstore(struct trap_frame *, uint32_t, union sigval *);
53int	nofpu_emulate_movci(struct trap_frame *, uint32_t);
54#endif
55
56typedef	int (fpu_fn3)(struct trap_frame *, uint, uint, uint, uint);
57typedef	int (fpu_fn4)(struct trap_frame *, uint, uint, uint, uint, uint);
58fpu_fn3	fpu_abs;
59fpu_fn3	fpu_add;
60int	fpu_c(struct trap_frame *, uint, uint, uint, uint, uint);
61fpu_fn3	fpu_ceil_l;
62fpu_fn3	fpu_ceil_w;
63fpu_fn3	fpu_cvt_d;
64fpu_fn3	fpu_cvt_l;
65fpu_fn3	fpu_cvt_s;
66fpu_fn3	fpu_cvt_w;
67fpu_fn3	fpu_div;
68fpu_fn3	fpu_floor_l;
69fpu_fn3	fpu_floor_w;
70fpu_fn4	fpu_madd;
71fpu_fn4	fpu_msub;
72fpu_fn3	fpu_mov;
73fpu_fn3	fpu_movcf;
74fpu_fn3	fpu_movn;
75fpu_fn3	fpu_movz;
76fpu_fn3	fpu_mul;
77fpu_fn3	fpu_neg;
78fpu_fn4	fpu_nmadd;
79fpu_fn4	fpu_nmsub;
80fpu_fn3	fpu_recip;
81fpu_fn3	fpu_round_l;
82fpu_fn3	fpu_round_w;
83fpu_fn3	fpu_rsqrt;
84fpu_fn3	fpu_sqrt;
85fpu_fn3	fpu_sub;
86fpu_fn3	fpu_trunc_l;
87fpu_fn3	fpu_trunc_w;
88
89int	fpu_int_l(struct trap_frame *, uint, uint, uint, uint, uint);
90int	fpu_int_w(struct trap_frame *, uint, uint, uint, uint, uint);
91
92/*
93 * Encoding of operand format within opcodes `fmt' and `fmt3' fields.
94 */
95#define	FMT_S	0x00
96#define	FMT_D	0x01
97#define	FMT_W	0x04
98#define	FMT_L	0x05
99
100/*
101 * Inlines from softfloat-specialize.h which are not made public, needed
102 * for fpu_abs.
103 */
104#define	float32_is_nan(a) \
105	(0xff000000 < (a << 1))
106#define	float32_is_signaling_nan(a) \
107	((((a >> 22) & 0x1ff) == 0x1fe) && (a & 0x003fffff))
108
109/*
110 * Precomputed results of intXX_to_floatXX(1)
111 */
112#define	ONE_F32	(float32)(SNG_EXP_BIAS << SNG_FRACBITS)
113#define	ONE_F64	(float64)((uint64_t)DBL_EXP_BIAS << DBL_FRACBITS)
114
115/*
116 * Handle a floating-point exception.
117 */
118void
119MipsFPTrap(struct trap_frame *tf)
120{
121	struct cpu_info *ci = curcpu();
122	struct proc *p = ci->ci_curproc;
123	union sigval sv;
124	vaddr_t pc;
125	uint32_t fsr, excbits;
126	uint32_t insn;
127	InstFmt inst;
128	int sig = 0;
129	int fault_type = SI_NOINFO;
130	int update_pcb = 0;
131	int emulate = 0;
132#ifdef FPUEMUL
133	int skip_insn = 1;
134#else
135	uint32_t sr;
136#endif
137
138	KDASSERT(tf == p->p_md.md_regs);
139
140#ifndef FPUEMUL
141	/*
142	 * Enable FPU, and read its status register.
143	 */
144
145	sr = getsr();
146	setsr(sr | SR_COP_1_BIT);
147
148	__asm__ __volatile__ ("cfc1 %0, $31" : "=r" (fsr));
149	__asm__ __volatile__ ("cfc1 %0, $31" : "=r" (fsr));
150
151	/*
152	 * If this is not an unimplemented operation, but a genuine
153	 * FPU exception, signal the process.
154	 */
155
156	if ((fsr & FPCSR_C_E) == 0) {
157		sig = SIGFPE;
158		goto deliver;
159	}
160#else
161#ifdef CPU_OCTEON
162	/*
163	 * SR_FR_32 is hardwired to zero on Octeon; make sure it is
164	 * set in the emulation view of the FPU state.
165	 */
166	tf->sr |= SR_FR_32;
167#endif
168#endif	/* FPUEMUL */
169
170	/*
171	 * Get the faulting instruction.  This should not fail, and
172	 * if it does, it's probably not your lucky day.
173	 */
174
175	pc = (vaddr_t)tf->pc;
176	if (tf->cause & CR_BR_DELAY)
177		pc += 4;
178	if (copyin((void *)pc, &insn, sizeof insn) != 0) {
179		sig = SIGBUS;
180		fault_type = BUS_OBJERR;
181		goto deliver;
182	}
183	inst = *(InstFmt *)&insn;
184
185	/*
186	 * Emulate the instruction.
187	 */
188
189#ifdef DEBUG
190#ifdef DDB
191	printf("%s: unimplemented FPU completion, fsr 0x%08x\n%p: ",
192	    p->p_comm, fsr, pc);
193	dbmd_print_insn(insn, pc, printf);
194#else
195	printf("%s: unimplemented FPU completion, insn 0x%08x fsr 0x%08x\n",
196	    p->p_comm, insn, fsr);
197#endif
198#endif
199
200	switch (inst.FRType.op) {
201	default:
202		/*
203		 * Not a FPU instruction.
204		 */
205		break;
206#ifdef FPUEMUL
207	case OP_SPECIAL:
208		switch (inst.FRType.func) {
209		default:
210			/*
211			 * Not a FPU instruction.
212			 */
213			break;
214		case OP_MOVCI:
215			/*
216			 * This instruction should not require emulation,
217			 * unless there is no FPU.
218			 */
219			emulate = 1;
220			break;
221		}
222		break;
223	case OP_LDC1:
224	case OP_LWC1:
225	case OP_SDC1:
226	case OP_SWC1:
227		/*
228		 * These instructions should not require emulation,
229		 * unless there is no FPU.
230		 */
231		emulate = 1;
232		break;
233#endif
234	case OP_COP1:
235		switch (inst.RType.rs) {
236		case OP_BC:
237#ifdef FPUEMUL
238			skip_insn = 0;
239#endif
240		case OP_MF:
241		case OP_DMF:
242		case OP_CF:
243		case OP_MT:
244		case OP_DMT:
245		case OP_CT:
246			/*
247			 * These instructions should not require emulation,
248			 * unless there is no FPU.
249			 */
250#ifdef FPUEMUL
251			emulate = 1;
252#endif
253			break;
254		default:
255			emulate = 1;
256			break;
257		}
258		break;
259	case OP_COP1X:
260		switch (inst.FQType.op4) {
261		default:
262			switch (inst.FRType.func) {
263#ifdef FPUEMUL
264			case OP_LDXC1:
265			case OP_LWXC1:
266			case OP_SDXC1:
267			case OP_SWXC1:
268			case OP_PREFX:
269				/*
270				 * These instructions should not require
271				 * emulation, unless there is no FPU.
272				 */
273				emulate = 1;
274#endif
275			default:
276				/*
277				 * Not a valid instruction.
278				 */
279				break;
280			}
281			break;
282		case OP_MADD:
283		case OP_MSUB:
284		case OP_NMADD:
285		case OP_NMSUB:
286			emulate = 1;
287			break;
288		}
289		break;
290	}
291
292	if (emulate) {
293#ifndef FPUEMUL
294		KASSERT(p == ci->ci_fpuproc);
295		save_fpu();
296#endif
297		update_pcb = 1;
298
299		sig = fpu_emulate(tf, insn, &sv);
300		/* reload fsr, possibly modified by softfloat code */
301		fsr = tf->fsr;
302		if (sig == 0) {
303			/* raise SIGFPE if necessary */
304			excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
305			excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
306			if (excbits != 0)
307				sig = SIGFPE;
308		}
309	} else {
310		sig = SIGILL;
311		fault_type = ILL_ILLOPC;
312	}
313
314deliver:
315	switch (sig) {
316	case SIGFPE:
317		excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
318		excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
319		if (excbits & FP_X_INV)
320			fault_type = FPE_FLTINV;
321		else if (excbits & FP_X_DZ)
322			fault_type = FPE_INTDIV;
323		else if (excbits & FP_X_OFL)
324			fault_type = FPE_FLTUND;
325		else if (excbits & FP_X_UFL)
326			fault_type = FPE_FLTOVF;
327		else /* if (excbits & FP_X_IMP) */
328			fault_type = FPE_FLTRES;
329
330		break;
331#ifdef FPUEMUL
332	case SIGBUS:
333		fault_type = BUS_ADRALN;
334		break;
335	case SIGSEGV:
336		fault_type = SEGV_MAPERR;
337		break;
338#endif
339	}
340
341	/*
342	 * Skip the instruction, unless we are delivering SIGILL.
343	 */
344#ifdef FPUEMUL
345	if (skip_insn) {
346#endif
347		if (sig != SIGILL) {
348			if (tf->cause & CR_BR_DELAY) {
349				/*
350				 * Note that it doesn't matter, at this point,
351				 * that we pass the updated FSR value, as it is
352				 * only used to decide whether to branch or not
353				 * if the faulting instruction was BC1[FT].
354				 */
355				tf->pc = MipsEmulateBranch(tf, tf->pc, fsr, 0);
356			} else
357				tf->pc += 4;
358		}
359#ifdef FPUEMUL
360	}
361#endif
362
363	/*
364	 * Update the FPU status register.
365	 * We need to make sure that this will not cause an exception
366	 * in kernel mode.
367	 */
368
369	/* propagate raised exceptions to the sticky bits */
370	fsr &= ~FPCSR_C_E;
371	excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
372	fsr |= excbits << FPCSR_F_SHIFT;
373	/* clear all exception sources */
374	fsr &= ~FPCSR_C_MASK;
375	if (update_pcb)
376		tf->fsr = fsr;
377#ifndef FPUEMUL
378	__asm__ __volatile__ ("ctc1 %0, $31" :: "r" (fsr));
379	/* disable fpu before returning to trap() */
380	setsr(sr);
381#endif
382
383	if (sig != 0) {
384#ifdef FPUEMUL
385		if (sig != SIGBUS && sig != SIGSEGV)
386#endif
387			sv.sival_ptr = (void *)pc;
388		KERNEL_LOCK();
389		trapsignal(p, sig, 0, fault_type, sv);
390		KERNEL_UNLOCK();
391	}
392}
393
394/*
395 * Emulate an FPU instruction.  The FPU register set has been saved in the
396 * current PCB, and is pointed to by the trap frame.
397 */
398int
399fpu_emulate(struct trap_frame *tf, uint32_t insn, union sigval *sv)
400{
401	InstFmt inst;
402
403	tf->zero = 0;	/* not written by trap code */
404
405	inst = *(InstFmt *)&insn;
406	switch (inst.FRType.op) {
407	default:
408		break;
409#ifdef FPUEMUL
410	case OP_SPECIAL:
411		return nofpu_emulate_movci(tf, insn);
412	case OP_LDC1:
413	case OP_LWC1:
414	case OP_SDC1:
415	case OP_SWC1:
416		return nofpu_emulate_loadstore(tf, insn, sv);
417#endif
418	case OP_COP1:
419		switch (inst.RType.rs) {
420#ifdef FPUEMUL
421		case OP_MF:
422		case OP_DMF:
423		case OP_CF:
424		case OP_MT:
425		case OP_DMT:
426		case OP_CT:
427		case OP_BC:
428			return nofpu_emulate_cop1(tf, insn, sv);
429#endif
430		default:
431			return fpu_emulate_cop1(tf, insn);
432		}
433		break;
434	case OP_COP1X:
435		switch (inst.FQType.op4) {
436#ifdef FPUEMUL
437		default:
438			switch (inst.FRType.func) {
439			case OP_LDXC1:
440			case OP_LWXC1:
441			case OP_SDXC1:
442			case OP_SWXC1:
443			case OP_PREFX:
444				return nofpu_emulate_cop1x(tf, insn, sv);
445			default:
446				break;
447			}
448			break;
449		case OP_MADD:
450		case OP_MSUB:
451		case OP_NMADD:
452		case OP_NMSUB:
453			return fpu_emulate_cop1x(tf, insn);
454#else
455		default:
456			return fpu_emulate_cop1x(tf, insn);
457#endif
458		}
459	}
460
461	return SIGILL;
462}
463
464/*
465 * Emulate a COP1 FPU instruction.
466 */
467int
468fpu_emulate_cop1(struct trap_frame *tf, uint32_t insn)
469{
470	InstFmt inst;
471	uint ft, fs, fd;
472	fpu_fn3 *fpu_op;
473	static fpu_fn3 *const fpu_ops1[1 << 6] = {
474		fpu_add,		/* 0x00 */
475		fpu_sub,
476		fpu_mul,
477		fpu_div,
478		fpu_sqrt,
479		fpu_abs,
480		fpu_mov,
481		fpu_neg,
482		fpu_round_l,		/* 0x08 */
483		fpu_trunc_l,
484		fpu_ceil_l,
485		fpu_floor_l,
486		fpu_round_w,
487		fpu_trunc_w,
488		fpu_ceil_w,
489		fpu_floor_w,
490		NULL,			/* 0x10 */
491		fpu_movcf,
492		fpu_movz,
493		fpu_movn,
494		NULL,
495		fpu_recip,
496		fpu_rsqrt,
497		NULL,
498		NULL,			/* 0x18 */
499		NULL,
500		NULL,
501		NULL,
502		NULL,
503		NULL,
504		NULL,
505		NULL,
506		fpu_cvt_s,		/* 0x20 */
507		fpu_cvt_d,
508		NULL,
509		NULL,
510		fpu_cvt_w,
511		fpu_cvt_l,
512		NULL,
513		NULL,
514		NULL,			/* 0x28 */
515		NULL,
516		NULL,
517		NULL,
518		NULL,
519		NULL,
520		NULL,
521		NULL,
522		(fpu_fn3 *)fpu_c,	/* 0x30 */
523		(fpu_fn3 *)fpu_c,
524		(fpu_fn3 *)fpu_c,
525		(fpu_fn3 *)fpu_c,
526		(fpu_fn3 *)fpu_c,
527		(fpu_fn3 *)fpu_c,
528		(fpu_fn3 *)fpu_c,
529		(fpu_fn3 *)fpu_c,
530		(fpu_fn3 *)fpu_c,	/* 0x38 */
531		(fpu_fn3 *)fpu_c,
532		(fpu_fn3 *)fpu_c,
533		(fpu_fn3 *)fpu_c,
534		(fpu_fn3 *)fpu_c,
535		(fpu_fn3 *)fpu_c,
536		(fpu_fn3 *)fpu_c,
537		(fpu_fn3 *)fpu_c
538	};
539
540	inst = *(InstFmt *)&insn;
541
542	/*
543	 * Check for valid function code.
544	 */
545
546	fpu_op = fpu_ops1[inst.FRType.func];
547	if (fpu_op == NULL)
548		return SIGILL;
549
550	/*
551	 * Check for valid format.  FRType assumes bit 25 is always set,
552	 * so we need to check for it explicitely.
553	 */
554
555	if ((insn & (1 << 25)) == 0)
556		return SIGILL;
557	switch (inst.FRType.fmt) {
558	default:
559		return SIGILL;
560	case FMT_S:
561	case FMT_D:
562	case FMT_W:
563	case FMT_L:
564		break;
565	}
566
567	/*
568	 * Check for valid register values. Only even-numbered registers
569	 * can be used if the FR bit is clear in coprocessor 0 status
570	 * register.
571	 *
572	 * Note that c.cond does not specify a register number in the fd
573	 * field, but the fd field must have zero in its low two bits, so
574	 * the test will not reject valid c.cond instructions.
575	 */
576
577	ft = inst.FRType.ft;
578	fs = inst.FRType.fs;
579	fd = inst.FRType.fd;
580	if ((tf->sr & SR_FR_32) == 0) {
581		if ((ft | fs | fd) & 1)
582			return SIGILL;
583	}
584
585	/*
586	 * Finally dispatch to the proper routine.
587	 */
588
589	if (fpu_op == (fpu_fn3 *)&fpu_c)
590		return fpu_c(tf, inst.FRType.fmt, ft, fs, fd, inst.FRType.func);
591	else
592		return (*fpu_op)(tf, inst.FRType.fmt, ft, fs, fd);
593}
594
595/*
596 * Emulate a COP1X FPU instruction.
597 */
598int
599fpu_emulate_cop1x(struct trap_frame *tf, uint32_t insn)
600{
601	InstFmt inst;
602	uint fr, ft, fs, fd;
603	fpu_fn4 *fpu_op;
604	static fpu_fn4 *const fpu_ops1x[1 << 3] = {
605		NULL,
606		NULL,
607		NULL,
608		NULL,
609		fpu_madd,
610		fpu_msub,
611		fpu_nmadd,
612		fpu_nmsub
613	};
614
615	inst = *(InstFmt *)&insn;
616
617	/*
618	 * Check for valid function code.
619	 */
620
621	fpu_op = fpu_ops1x[inst.FQType.op4];
622	if (fpu_op == NULL)
623		return SIGILL;
624
625	/*
626	 * Check for valid format.
627	 */
628
629	switch (inst.FQType.fmt3) {
630	default:
631		return SIGILL;
632	case FMT_S:
633	case FMT_D:
634	case FMT_W:
635	case FMT_L:
636		break;
637	}
638
639	/*
640	 * Check for valid register values. Only even-numbered registers
641	 * can be used if the FR bit is clear in coprocessor 0 status
642	 * register.
643	 */
644
645	fr = inst.FQType.fr;
646	ft = inst.FQType.ft;
647	fs = inst.FQType.fs;
648	fd = inst.FQType.fd;
649	if ((tf->sr & SR_FR_32) == 0) {
650		if ((fr | ft | fs | fd) & 1)
651			return SIGILL;
652	}
653
654	/*
655	 * Finally dispatch to the proper routine.
656	 */
657
658	return (*fpu_op)(tf, inst.FRType.fmt, fr, ft, fs, fd);
659}
660
661/*
662 * Load a floating-point argument according to the specified format.
663 */
664uint64_t
665fpu_load(struct trap_frame *tf, uint fmt, uint regno)
666{
667	register_t *regs = (register_t *)tf;
668	uint64_t tmp, tmp2;
669
670	tmp = (uint64_t)regs[FPBASE + regno];
671	if (tf->sr & SR_FR_32) {
672		switch (fmt) {
673		case FMT_D:
674		case FMT_L:
675			break;
676		case FMT_S:
677		case FMT_W:
678			tmp &= 0xffffffff;
679			break;
680		}
681	} else {
682		tmp &= 0xffffffff;
683		switch (fmt) {
684		case FMT_D:
685		case FMT_L:
686			/* caller has enforced regno is even */
687			tmp2 = (uint64_t)regs[FPBASE + regno + 1];
688			tmp |= tmp2 << 32;
689			break;
690		case FMT_S:
691		case FMT_W:
692			break;
693		}
694	}
695
696	return tmp;
697}
698
699/*
700 * Store a floating-point result according to the specified format.
701 */
702void
703fpu_store(struct trap_frame *tf, uint fmt, uint regno, uint64_t rslt)
704{
705	register_t *regs = (register_t *)tf;
706
707	if (tf->sr & SR_FR_32) {
708		regs[FPBASE + regno] = rslt;
709	} else {
710		/* caller has enforced regno is even */
711		regs[FPBASE + regno] = rslt & 0xffffffff;
712		regs[FPBASE + regno + 1] = (rslt >> 32) & 0xffffffff;
713	}
714}
715
716/*
717 * Integer conversion
718 */
719
720int
721fpu_int_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint rm)
722{
723	uint64_t raw;
724	uint32_t oldrm;
725
726	if (ft != 0)
727		return SIGILL;
728	if (fmt != FMT_S && fmt != FMT_D)
729		return SIGILL;
730
731	raw = fpu_load(tf, fmt, fs);
732
733	/* round towards required mode */
734	oldrm = tf->fsr & FPCSR_RM_MASK;
735	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
736	if (fmt == FMT_S)
737		raw = float32_to_int64((float32)raw);
738	else
739		raw = float64_to_int64((float64)raw);
740	/* restore rounding mode */
741	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;
742
743	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
744		fpu_store(tf, fmt, fd, raw);
745
746	return 0;
747}
748
749int
750fpu_int_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint rm)
751{
752	uint64_t raw;
753	uint32_t oldrm;
754
755	if (ft != 0)
756		return SIGILL;
757	if (fmt != FMT_S && fmt != FMT_D)
758		return SIGILL;
759
760	raw = fpu_load(tf, fmt, fs);
761
762	/* round towards required mode */
763	oldrm = tf->fsr & FPCSR_RM_MASK;
764	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
765	if (fmt == FMT_S)
766		raw = float32_to_int32((float32)raw);
767	else
768		raw = float64_to_int32((float64)raw);
769	/* restore rounding mode */
770	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;
771
772	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
773		fpu_store(tf, fmt, fd, raw);
774
775	return 0;
776}
777
778/*
779 * FPU Instruction emulation
780 */
781
782int
783fpu_abs(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
784{
785	uint64_t raw;
786
787	if (ft != 0)
788		return SIGILL;
789	if (fmt != FMT_S && fmt != FMT_D)
790		return SIGILL;
791
792	raw = fpu_load(tf, fmt, fs);
793	/* clear sign bit unless NaN */
794	if (fmt == FMT_S) {
795		float32 f32 = (float32)raw;
796		if (float32_is_nan(f32)) {
797			float_set_invalid();
798		} else {
799			f32 &= ~(1L << 31);
800			raw = (uint64_t)f32;
801		}
802	} else {
803		float64 f64 = (float64)raw;
804		if (float64_is_nan(f64)) {
805			float_set_invalid();
806		} else {
807			f64 &= ~(1L << 63);
808			raw = (uint64_t)f64;
809		}
810	}
811	fpu_store(tf, fmt, fd, raw);
812
813	return 0;
814}
815
816int
817fpu_add(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
818{
819	uint64_t raw1, raw2, rslt;
820
821	if (fmt != FMT_S && fmt != FMT_D)
822		return SIGILL;
823
824	raw1 = fpu_load(tf, fmt, fs);
825	raw2 = fpu_load(tf, fmt, ft);
826	if (fmt == FMT_S) {
827		float32 f32 = float32_add((float32)raw1, (float32)raw2);
828		rslt = (uint64_t)f32;
829	} else {
830		float64 f64 = float64_add((float64)raw1, (float64)raw2);
831		rslt = (uint64_t)f64;
832	}
833	fpu_store(tf, fmt, fd, rslt);
834
835	return 0;
836}
837
838int
839fpu_c(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint op)
840{
841	uint64_t raw1, raw2;
842	uint cc, lt, eq, uo;
843
844	if ((fd & 0x03) != 0)
845		return SIGILL;
846	if (fmt != FMT_S && fmt != FMT_D)
847		return SIGILL;
848
849	lt = eq = uo = 0;
850	cc = fd >> 2;
851
852	raw1 = fpu_load(tf, fmt, fs);
853	raw2 = fpu_load(tf, fmt, ft);
854
855	if (fmt == FMT_S) {
856		float32 f32a = (float32)raw1;
857		float32 f32b = (float32)raw2;
858		if (float32_is_nan(f32a)) {
859			uo = 1 << 0;
860			if (float32_is_signaling_nan(f32a))
861				op |= 0x08;	/* force invalid exception */
862		}
863		if (float32_is_nan(f32b)) {
864			uo = 1 << 0;
865			if (float32_is_signaling_nan(f32b))
866				op |= 0x08;	/* force invalid exception */
867		}
868		if (uo == 0) {
869			if (float32_eq(f32a, f32b))
870				eq = 1 << 1;
871			else if (float32_lt(f32a, f32b))
872				lt = 1 << 2;
873		}
874	} else {
875		float64 f64a = (float64)raw1;
876		float64 f64b = (float64)raw2;
877		if (float64_is_nan(f64a)) {
878			uo = 1 << 0;
879			if (float64_is_signaling_nan(f64a))
880				op |= 0x08;	/* force invalid exception */
881		}
882		if (float64_is_nan(f64b)) {
883			uo = 1 << 0;
884			if (float64_is_signaling_nan(f64b))
885				op |= 0x08;	/* force invalid exception */
886		}
887		if (uo == 0) {
888			if (float64_eq(f64a, f64b))
889				eq = 1 << 1;
890			else if (float64_lt(f64a, f64b))
891				lt = 1 << 2;
892		}
893	}
894
895	if (uo && (op & 0x08)) {
896		float_set_invalid();
897		if (tf->fsr & FPCSR_E_V) {
898			/* comparison result intentionaly not written */
899			goto skip;
900		}
901	} else {
902		if ((uo | eq | lt) & op)
903			tf->fsr |= FPCSR_CONDVAL(cc);
904		else
905			tf->fsr &= ~FPCSR_CONDVAL(cc);
906	}
907skip:
908
909	return 0;
910}
911
912int
913fpu_ceil_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
914{
915	/* round towards positive infinity */
916	return fpu_int_l(tf, fmt, ft, fs, fd, FP_RP);
917}
918
919int
920fpu_ceil_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
921{
922	/* round towards positive infinity */
923	return fpu_int_w(tf, fmt, ft, fs, fd, FP_RP);
924}
925
926int
927fpu_cvt_d(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
928{
929	uint64_t raw;
930
931	if (ft != 0)
932		return SIGILL;
933	if (fmt == FMT_D)
934		return SIGILL;
935
936	raw = fpu_load(tf, fmt, fs);
937	switch (fmt) {
938	case FMT_L:
939		raw = int64_to_float64((int64_t)raw);
940		break;
941	case FMT_S:
942		raw = float32_to_float64((float32)raw);
943		break;
944	case FMT_W:
945		raw = int32_to_float64((int32_t)raw);
946		break;
947	}
948	fpu_store(tf, fmt, fd, raw);
949
950	return 0;
951}
952
953int
954fpu_cvt_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
955{
956	uint64_t raw;
957	uint32_t rm;
958
959	if (ft != 0)
960		return SIGILL;
961	if (fmt != FMT_S && fmt != FMT_D)
962		return SIGILL;
963
964	rm = tf->fsr & FPCSR_RM_MASK;
965	raw = fpu_load(tf, fmt, fs);
966	if (fmt == FMT_D) {
967		if (rm == FP_RZ)
968			raw = float64_to_int64_round_to_zero((float64)raw);
969		else
970			raw = float64_to_int64((float64)raw);
971	} else {
972		if (rm == FP_RZ)
973			raw = float32_to_int64_round_to_zero((float32)raw);
974		else
975			raw = float32_to_int64((float32)raw);
976	}
977	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
978		fpu_store(tf, fmt, fd, raw);
979
980	return 0;
981}
982
983int
984fpu_cvt_s(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
985{
986	uint64_t raw;
987
988	if (ft != 0)
989		return SIGILL;
990	if (fmt == FMT_S)
991		return SIGILL;
992
993	raw = fpu_load(tf, fmt, fs);
994	switch (fmt) {
995	case FMT_D:
996		raw = float64_to_float32((float64)raw);
997		break;
998	case FMT_L:
999		raw = int64_to_float32((int64_t)raw);
1000		break;
1001	case FMT_W:
1002		raw = int32_to_float32((int32_t)raw);
1003		break;
1004	}
1005	fpu_store(tf, fmt, fd, raw);
1006
1007	return 0;
1008}
1009
1010int
1011fpu_cvt_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1012{
1013	uint64_t raw;
1014	uint32_t rm;
1015
1016	if (ft != 0)
1017		return SIGILL;
1018	if (fmt != FMT_S && fmt != FMT_D)
1019		return SIGILL;
1020
1021	rm = tf->fsr & FPCSR_RM_MASK;
1022	raw = fpu_load(tf, fmt, fs);
1023	if (fmt == FMT_D) {
1024		if (rm == FP_RZ)
1025			raw = float64_to_int32_round_to_zero((float64)raw);
1026		else
1027			raw = float64_to_int32((float64)raw);
1028	} else {
1029		if (rm == FP_RZ)
1030			raw = float32_to_int32_round_to_zero((float32)raw);
1031		else
1032			raw = float32_to_int32((float32)raw);
1033	}
1034	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
1035		fpu_store(tf, fmt, fd, raw);
1036
1037	return 0;
1038}
1039
1040int
1041fpu_div(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1042{
1043	uint64_t raw1, raw2, rslt;
1044
1045	if (fmt != FMT_S && fmt != FMT_D)
1046		return SIGILL;
1047
1048	raw1 = fpu_load(tf, fmt, fs);
1049	raw2 = fpu_load(tf, fmt, ft);
1050	if (fmt == FMT_S) {
1051		float32 f32 = float32_div((float32)raw1, (float32)raw2);
1052		rslt = (uint64_t)f32;
1053	} else {
1054		float64 f64 = float64_div((float64)raw1, (float64)raw2);
1055		rslt = (uint64_t)f64;
1056	}
1057	fpu_store(tf, fmt, fd, rslt);
1058
1059	return 0;
1060}
1061
1062int
1063fpu_floor_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1064{
1065	/* round towards negative infinity */
1066	return fpu_int_l(tf, fmt, ft, fs, fd, FP_RM);
1067}
1068
1069int
1070fpu_floor_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1071{
1072	/* round towards negative infinity */
1073	return fpu_int_w(tf, fmt, ft, fs, fd, FP_RM);
1074}
1075
1076int
1077fpu_madd(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)
1078{
1079	uint64_t raw1, raw2, raw3, rslt;
1080
1081	if (fmt != FMT_S && fmt != FMT_D)
1082		return SIGILL;
1083
1084	raw1 = fpu_load(tf, fmt, fs);
1085	raw2 = fpu_load(tf, fmt, ft);
1086	raw3 = fpu_load(tf, fmt, fr);
1087	if (fmt == FMT_S) {
1088		float32 f32 = float32_add(
1089		    float32_mul((float32)raw1, (float32)raw2),
1090		    (float32)raw3);
1091		rslt = (uint64_t)f32;
1092	} else {
1093		float64 f64 = float64_add(
1094		    float64_mul((float64)raw1, (float64)raw2),
1095		    (float64)raw3);
1096		rslt = (uint64_t)f64;
1097	}
1098	fpu_store(tf, fmt, fd, rslt);
1099
1100	return 0;
1101}
1102
1103int
1104fpu_mov(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1105{
1106	uint64_t raw;
1107
1108	if (ft != 0)
1109		return SIGILL;
1110	if (fmt != FMT_S && fmt != FMT_D)
1111		return SIGILL;
1112
1113	raw = fpu_load(tf, fmt, fs);
1114	fpu_store(tf, fmt, fd, raw);
1115
1116	return 0;
1117}
1118
1119int
1120fpu_movcf(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1121{
1122	uint64_t raw;
1123	uint cc, istf;
1124	int condition;
1125
1126	if ((ft & 0x02) != 0)
1127		return SIGILL;
1128	cc = ft >> 2;
1129	if (fmt != FMT_S && fmt != FMT_D)
1130		return SIGILL;
1131
1132	condition = tf->fsr & FPCSR_CONDVAL(cc);
1133	istf = ft & COPz_BC_TF_MASK;
1134	if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
1135		raw = fpu_load(tf, fmt, fs);
1136		fpu_store(tf, fmt, fd, raw);
1137	}
1138
1139	return 0;
1140}
1141
1142int
1143fpu_movn(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1144{
1145	register_t *regs = (register_t *)tf;
1146	uint64_t raw;
1147
1148	if (fmt != FMT_S && fmt != FMT_D)
1149		return SIGILL;
1150
1151	if (ft != ZERO && regs[ft] != 0) {
1152		raw = fpu_load(tf, fmt, fs);
1153		fpu_store(tf, fmt, fd, raw);
1154	}
1155
1156	return 0;
1157}
1158
1159int
1160fpu_movz(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1161{
1162	register_t *regs = (register_t *)tf;
1163	uint64_t raw;
1164
1165	if (fmt != FMT_S && fmt != FMT_D)
1166		return SIGILL;
1167
1168	if (ft == ZERO || regs[ft] == 0) {
1169		raw = fpu_load(tf, fmt, fs);
1170		fpu_store(tf, fmt, fd, raw);
1171	}
1172
1173	return 0;
1174}
1175
1176int
1177fpu_msub(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)
1178{
1179	uint64_t raw1, raw2, raw3, rslt;
1180
1181	if (fmt != FMT_S && fmt != FMT_D)
1182		return SIGILL;
1183
1184	raw1 = fpu_load(tf, fmt, fs);
1185	raw2 = fpu_load(tf, fmt, ft);
1186	raw3 = fpu_load(tf, fmt, fr);
1187	if (fmt == FMT_S) {
1188		float32 f32 = float32_sub(
1189		    float32_mul((float32)raw1, (float32)raw2),
1190		    (float32)raw3);
1191		rslt = (uint64_t)f32;
1192	} else {
1193		float64 f64 = float64_sub(
1194		    float64_mul((float64)raw1, (float64)raw2),
1195		    (float64)raw3);
1196		rslt = (uint64_t)f64;
1197	}
1198	fpu_store(tf, fmt, fd, rslt);
1199
1200	return 0;
1201}
1202
1203int
1204fpu_mul(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1205{
1206	uint64_t raw1, raw2, rslt;
1207
1208	if (fmt != FMT_S && fmt != FMT_D)
1209		return SIGILL;
1210
1211	raw1 = fpu_load(tf, fmt, fs);
1212	raw2 = fpu_load(tf, fmt, ft);
1213	if (fmt == FMT_S) {
1214		float32 f32 = float32_mul((float32)raw1, (float32)raw2);
1215		rslt = (uint64_t)f32;
1216	} else {
1217		float64 f64 = float64_mul((float64)raw1, (float64)raw2);
1218		rslt = (uint64_t)f64;
1219	}
1220	fpu_store(tf, fmt, fd, rslt);
1221
1222	return 0;
1223}
1224
1225int
1226fpu_neg(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1227{
1228	uint64_t raw;
1229
1230	if (ft != 0)
1231		return SIGILL;
1232	if (fmt != FMT_S && fmt != FMT_D)
1233		return SIGILL;
1234
1235	raw = fpu_load(tf, fmt, fs);
1236	/* flip sign bit unless NaN */
1237	if (fmt == FMT_S) {
1238		float32 f32 = (float32)raw;
1239		if (float32_is_nan(f32)) {
1240			float_set_invalid();
1241		} else {
1242			f32 ^= 1L << 31;
1243			raw = (uint64_t)f32;
1244		}
1245	} else {
1246		float64 f64 = (float64)raw;
1247		if (float64_is_nan(f64)) {
1248			float_set_invalid();
1249		} else {
1250			f64 ^= 1L << 63;
1251			raw = (uint64_t)f64;
1252		}
1253	}
1254	fpu_store(tf, fmt, fd, raw);
1255
1256	return 0;
1257}
1258
1259int
1260fpu_nmadd(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)
1261{
1262	uint64_t raw1, raw2, raw3, rslt;
1263
1264	if (fmt != FMT_S && fmt != FMT_D)
1265		return SIGILL;
1266
1267	raw1 = fpu_load(tf, fmt, fs);
1268	raw2 = fpu_load(tf, fmt, ft);
1269	raw3 = fpu_load(tf, fmt, fr);
1270	if (fmt == FMT_S) {
1271		float32 f32 = float32_add(
1272		    float32_mul((float32)raw1, (float32)raw2),
1273		    (float32)raw3);
1274		if (float32_is_nan(f32))
1275			float_set_invalid();
1276		else
1277			f32 ^= 1L << 31;
1278		rslt = (uint64_t)f32;
1279	} else {
1280		float64 f64 = float64_add(
1281		    float64_mul((float64)raw1, (float64)raw2),
1282		    (float64)raw3);
1283		if (float64_is_nan(f64))
1284			float_set_invalid();
1285		else
1286			f64 ^= 1L << 63;
1287		rslt = (uint64_t)f64;
1288	}
1289	fpu_store(tf, fmt, fd, rslt);
1290
1291	return 0;
1292}
1293
1294int
1295fpu_nmsub(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)
1296{
1297	uint64_t raw1, raw2, raw3, rslt;
1298
1299	if (fmt != FMT_S && fmt != FMT_D)
1300		return SIGILL;
1301
1302	raw1 = fpu_load(tf, fmt, fs);
1303	raw2 = fpu_load(tf, fmt, ft);
1304	raw3 = fpu_load(tf, fmt, fr);
1305	if (fmt == FMT_S) {
1306		float32 f32 = float32_sub(
1307		    float32_mul((float32)raw1, (float32)raw2),
1308		    (float32)raw3);
1309		if (float32_is_nan(f32))
1310			float_set_invalid();
1311		else
1312			f32 ^= 1L << 31;
1313		rslt = (uint64_t)f32;
1314	} else {
1315		float64 f64 = float64_sub(
1316		    float64_mul((float64)raw1, (float64)raw2),
1317		    (float64)raw3);
1318		if (float64_is_nan(f64))
1319			float_set_invalid();
1320		else
1321			f64 ^= 1L << 63;
1322		rslt = (uint64_t)f64;
1323	}
1324	fpu_store(tf, fmt, fd, rslt);
1325
1326	return 0;
1327}
1328
1329int
1330fpu_recip(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1331{
1332	uint64_t raw;
1333
1334	if (ft != 0)
1335		return SIGILL;
1336	if (fmt != FMT_S && fmt != FMT_D)
1337		return SIGILL;
1338
1339	raw = fpu_load(tf, fmt, fs);
1340	if (fmt == FMT_S) {
1341		float32 f32 = float32_div(ONE_F32, (float32)raw);
1342		raw = (uint64_t)f32;
1343	} else {
1344		float64 f64 = float64_div(ONE_F64, (float64)raw);
1345		raw = (uint64_t)f64;
1346	}
1347	fpu_store(tf, fmt, fd, raw);
1348
1349	return 0;
1350}
1351
1352int
1353fpu_round_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1354{
1355	/* round towards nearest */
1356	return fpu_int_l(tf, fmt, ft, fs, fd, FP_RN);
1357}
1358
1359int
1360fpu_round_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1361{
1362	/* round towards nearest */
1363	return fpu_int_w(tf, fmt, ft, fs, fd, FP_RN);
1364}
1365
1366int
1367fpu_rsqrt(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1368{
1369	uint64_t raw;
1370
1371	if (ft != 0)
1372		return SIGILL;
1373	if (fmt != FMT_S && fmt != FMT_D)
1374		return SIGILL;
1375
1376	raw = fpu_load(tf, fmt, fs);
1377	if (fmt == FMT_S) {
1378		float32 f32 = float32_sqrt((float32)raw);
1379		if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
1380		    (FPCSR_C_V | FPCSR_E_V))
1381			f32 = float32_div(ONE_F32, f32);
1382		raw = (uint64_t)f32;
1383	} else {
1384		float64 f64 = float64_sqrt((float64)raw);
1385		if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
1386		    (FPCSR_C_V | FPCSR_E_V))
1387			f64 = float64_div(ONE_F64, f64);
1388		raw = (uint64_t)f64;
1389	}
1390	fpu_store(tf, fmt, fd, raw);
1391
1392	return 0;
1393}
1394
1395int
1396fpu_sqrt(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1397{
1398	uint64_t raw;
1399
1400	if (ft != 0)
1401		return SIGILL;
1402	if (fmt != FMT_S && fmt != FMT_D)
1403		return SIGILL;
1404
1405	raw = fpu_load(tf, fmt, fs);
1406	if (fmt == FMT_S) {
1407		float32 f32 = float32_sqrt((float32)raw);
1408		raw = (uint64_t)f32;
1409	} else {
1410		float64 f64 = float64_sqrt((float64)raw);
1411		raw = (uint64_t)f64;
1412	}
1413	fpu_store(tf, fmt, fd, raw);
1414
1415	return 0;
1416}
1417
1418int
1419fpu_sub(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1420{
1421	uint64_t raw1, raw2, rslt;
1422
1423	if (fmt != FMT_S && fmt != FMT_D)
1424		return SIGILL;
1425
1426	raw1 = fpu_load(tf, fmt, fs);
1427	raw2 = fpu_load(tf, fmt, ft);
1428	if (fmt == FMT_S) {
1429		float32 f32 = float32_sub((float32)raw1, (float32)raw2);
1430		rslt = (uint64_t)f32;
1431	} else {
1432		float64 f64 = float64_sub((float64)raw1, (float64)raw2);
1433		rslt = (uint64_t)f64;
1434	}
1435	fpu_store(tf, fmt, fd, rslt);
1436
1437	return 0;
1438}
1439
1440int
1441fpu_trunc_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1442{
1443	/* round towards zero */
1444	return fpu_int_l(tf, fmt, ft, fs, fd, FP_RZ);
1445}
1446
1447int
1448fpu_trunc_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1449{
1450	/* round towards zero */
1451	return fpu_int_w(tf, fmt, ft, fs, fd, FP_RZ);
1452}
1453
1454#ifdef FPUEMUL
1455
1456/*
1457 * Emulate a COP1 non-FPU instruction.
1458 */
1459int
1460nofpu_emulate_cop1(struct trap_frame *tf, uint32_t insn, union sigval *sv)
1461{
1462	register_t *regs = (register_t *)tf;
1463	InstFmt inst;
1464	int32_t cval;
1465
1466	inst = *(InstFmt *)&insn;
1467
1468	switch (inst.RType.rs) {
1469	case OP_MF:
1470		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1471			return SIGILL;
1472		if (inst.FRType.ft != ZERO)
1473			regs[inst.FRType.ft] =
1474			    (int32_t)regs[FPBASE + inst.FRType.fs];
1475		break;
1476	case OP_DMF:
1477		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1478			return SIGILL;
1479		if ((tf->sr & SR_FR_32) != 0 || (inst.FRType.fs & 1) == 0) {
1480			if (inst.FRType.ft != ZERO)
1481				regs[inst.FRType.ft] =
1482				    fpu_load(tf, FMT_L, inst.FRType.fs);
1483		}
1484		break;
1485	case OP_CF:
1486		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1487			return SIGILL;
1488		if (inst.FRType.ft != ZERO) {
1489			switch (inst.FRType.fs) {
1490			case 0:	/* FPC_ID */
1491				cval = MIPS_SOFT << 8;
1492				break;
1493			case 31: /* FPC_CSR */
1494				cval = (int32_t)tf->fsr;
1495				break;
1496			default:
1497				cval = 0;
1498				break;
1499			}
1500			regs[inst.FRType.ft] = (int64_t)cval;
1501		}
1502		break;
1503	case OP_MT:
1504		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1505			return SIGILL;
1506		regs[FPBASE + inst.FRType.fs] = (int32_t)regs[inst.FRType.ft];
1507		break;
1508	case OP_DMT:
1509		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1510			return SIGILL;
1511		if ((tf->sr & SR_FR_32) != 0 || (inst.FRType.fs & 1) == 0) {
1512			fpu_store(tf, FMT_L, inst.FRType.fs,
1513			    regs[inst.FRType.ft]);
1514		}
1515		break;
1516	case OP_CT:
1517		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1518			return SIGILL;
1519		cval = (int32_t)regs[inst.FRType.ft];
1520		switch (inst.FRType.fs) {
1521		case 31: /* FPC_CSR */
1522			cval &= ~FPCSR_C_E;
1523			tf->fsr = cval;
1524			break;
1525		case 0:	/* FPC_ID */
1526		default:
1527			break;
1528		}
1529		break;
1530	case OP_BC:
1531	   {
1532		uint cc, nd, istf;
1533		int condition;
1534		vaddr_t dest;
1535		uint32_t dinsn;
1536
1537		cc = (inst.RType.rt & COPz_BC_CC_MASK) >> COPz_BC_CC_SHIFT;
1538		nd = inst.RType.rt & COPz_BCL_TF_MASK;
1539		istf = inst.RType.rt & COPz_BC_TF_MASK;
1540		condition = tf->fsr & FPCSR_CONDVAL(cc);
1541		if ((!condition && !istf) /*bc1f*/ ||
1542		    (condition && istf) /*bc1t*/) {
1543			/*
1544			 * Branch taken: if the delay slot is not a nop,
1545			 * copy the delay slot instruction to the dedicated
1546			 * relocation page, in order to be able to have the
1547			 * cpu process it and give control back to the
1548			 * kernel, for us to redirect to the branch
1549			 * destination.
1550			 */
1551			/* inline MipsEmulateBranch(tf, tf->pc, tf->fsr, insn)*/
1552			dest = tf->pc + 4 + ((short)inst.IType.imm << 2);
1553			if (copyin((const void *)(tf->pc + 4), &dinsn,
1554			    sizeof dinsn)) {
1555				sv->sival_ptr = (void *)(tf->pc + 4);
1556				return SIGSEGV;
1557			}
1558			if (dinsn == 0x00000000 /* nop */ ||
1559			    dinsn == 0x00000040 /* ssnop */) {
1560				tf->pc = dest;
1561			} else {
1562				if (fpe_branch_emulate(curproc, tf, dinsn,
1563				    dest) != 0)
1564					return SIGILL;
1565			}
1566		} else {
1567			/*
1568			 * Branch not taken: skip the instruction, and
1569			 * skip the delay slot if it was a `branch likely'
1570			 * instruction.
1571			 */
1572			tf->pc += 4;
1573			if (nd)
1574				tf->pc += 4;
1575		}
1576	    }
1577		break;
1578	}
1579
1580	return 0;
1581}
1582
1583/*
1584 * Emulate a COP1X non-FPU instruction.
1585 */
1586int
1587nofpu_emulate_cop1x(struct trap_frame *tf, uint32_t insn, union sigval *sv)
1588{
1589	register_t *regs = (register_t *)tf;
1590	InstFmt inst;
1591	vaddr_t va;
1592	uint64_t ddata;
1593	uint32_t wdata;
1594
1595	inst = *(InstFmt *)&insn;
1596	switch (inst.FRType.func) {
1597	case OP_LDXC1:
1598		if (inst.FQType.fs != 0)
1599			return SIGILL;
1600		va = (vaddr_t)regs[inst.FQType.fr] +
1601		    (vaddr_t)regs[inst.FQType.ft];
1602		if ((va & 0x07) != 0) {
1603			sv->sival_ptr = (void *)va;
1604			return SIGBUS;
1605		}
1606		if (copyin((const void *)va, &ddata, sizeof ddata) != 0) {
1607			sv->sival_ptr = (void *)va;
1608			return SIGSEGV;
1609		}
1610		if ((tf->sr & SR_FR_32) != 0 || (inst.FQType.fd & 1) == 0)
1611			fpu_store(tf, FMT_L, inst.FQType.fd, ddata);
1612		break;
1613	case OP_LWXC1:
1614		if (inst.FQType.fs != 0)
1615			return SIGILL;
1616		va = (vaddr_t)regs[inst.FQType.fr] +
1617		    (vaddr_t)regs[inst.FQType.ft];
1618		if ((va & 0x03) != 0) {
1619			sv->sival_ptr = (void *)va;
1620			return SIGBUS;
1621		}
1622#ifdef __MIPSEB__
1623		va ^= 4;
1624#endif
1625		if (copyin((const void *)va, &wdata, sizeof wdata) != 0) {
1626			sv->sival_ptr = (void *)va;
1627			return SIGSEGV;
1628		}
1629		regs[FPBASE + inst.FQType.fd] = wdata;
1630		break;
1631	case OP_SDXC1:
1632		if (inst.FQType.fd != 0)
1633			return SIGILL;
1634		va = (vaddr_t)regs[inst.FQType.fr] +
1635		    (vaddr_t)regs[inst.FQType.ft];
1636		if ((va & 0x07) != 0) {
1637			sv->sival_ptr = (void *)va;
1638			return SIGBUS;
1639		}
1640		if ((tf->sr & SR_FR_32) != 0 || (inst.FQType.fs & 1) == 0)
1641			ddata = fpu_load(tf, FMT_L, inst.FQType.fs);
1642		else {
1643			/* undefined behaviour, don't expose stack content */
1644			ddata = 0;
1645		}
1646		if (copyout(&ddata, (void *)va, sizeof ddata) != 0) {
1647			sv->sival_ptr = (void *)va;
1648			return SIGSEGV;
1649		}
1650		break;
1651	case OP_SWXC1:
1652		if (inst.FQType.fd != 0)
1653			return SIGILL;
1654		va = (vaddr_t)regs[inst.FQType.fr] +
1655		    (vaddr_t)regs[inst.FQType.ft];
1656		if ((va & 0x03) != 0) {
1657			sv->sival_ptr = (void *)va;
1658			return SIGBUS;
1659		}
1660#ifdef __MIPSEB__
1661		va ^= 4;
1662#endif
1663		wdata = regs[FPBASE + inst.FQType.fs];
1664		if (copyout(&wdata, (void *)va, sizeof wdata) != 0) {
1665			sv->sival_ptr = (void *)va;
1666			return SIGSEGV;
1667		}
1668		break;
1669	case OP_PREFX:
1670		/* nothing to do */
1671		break;
1672	}
1673
1674	return 0;
1675}
1676
1677/*
1678 * Emulate a load/store instruction on FPU registers.
1679 */
1680int
1681nofpu_emulate_loadstore(struct trap_frame *tf, uint32_t insn, union sigval *sv)
1682{
1683	register_t *regs = (register_t *)tf;
1684	InstFmt inst;
1685	vaddr_t va;
1686	uint64_t ddata;
1687	uint32_t wdata;
1688
1689	inst = *(InstFmt *)&insn;
1690	switch (inst.IType.op) {
1691	case OP_LDC1:
1692		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1693		if ((va & 0x07) != 0) {
1694			sv->sival_ptr = (void *)va;
1695			return SIGBUS;
1696		}
1697		if (copyin((const void *)va, &ddata, sizeof ddata) != 0) {
1698			sv->sival_ptr = (void *)va;
1699			return SIGSEGV;
1700		}
1701		if ((tf->sr & SR_FR_32) != 0 || (inst.IType.rt & 1) == 0)
1702			fpu_store(tf, FMT_L, inst.IType.rt, ddata);
1703		break;
1704	case OP_LWC1:
1705		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1706		if ((va & 0x03) != 0) {
1707			sv->sival_ptr = (void *)va;
1708			return SIGBUS;
1709		}
1710#ifdef __MIPSEB__
1711		va ^= 4;
1712#endif
1713		if (copyin((const void *)va, &wdata, sizeof wdata) != 0) {
1714			sv->sival_ptr = (void *)va;
1715			return SIGSEGV;
1716		}
1717		regs[FPBASE + inst.IType.rt] = wdata;
1718		break;
1719	case OP_SDC1:
1720		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1721		if ((va & 0x07) != 0) {
1722			sv->sival_ptr = (void *)va;
1723			return SIGBUS;
1724		}
1725		if ((tf->sr & SR_FR_32) != 0 || (inst.IType.rt & 1) == 0)
1726			ddata = fpu_load(tf, FMT_L, inst.IType.rt);
1727		else {
1728			/* undefined behaviour, don't expose stack content */
1729			ddata = 0;
1730		}
1731		if (copyout(&ddata, (void *)va, sizeof ddata) != 0) {
1732			sv->sival_ptr = (void *)va;
1733			return SIGSEGV;
1734		}
1735		break;
1736	case OP_SWC1:
1737		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1738		if ((va & 0x03) != 0) {
1739			sv->sival_ptr = (void *)va;
1740			return SIGBUS;
1741		}
1742#ifdef __MIPSEB__
1743		va ^= 4;
1744#endif
1745		wdata = regs[FPBASE + inst.IType.rt];
1746		if (copyout(&wdata, (void *)va, sizeof wdata) != 0) {
1747			sv->sival_ptr = (void *)va;
1748			return SIGSEGV;
1749		}
1750		break;
1751	}
1752
1753	return 0;
1754}
1755
1756/*
1757 * Emulate MOVF and MOVT.
1758 */
1759int
1760nofpu_emulate_movci(struct trap_frame *tf, uint32_t insn)
1761{
1762	register_t *regs = (register_t *)tf;
1763	InstFmt inst;
1764	uint cc, istf;
1765	int condition;
1766
1767	inst = *(InstFmt *)&insn;
1768	if ((inst.RType.rt & 0x02) != 0 || inst.RType.shamt != 0)
1769		return SIGILL;
1770
1771	cc = inst.RType.rt >> 2;
1772	istf = inst.RType.rt & COPz_BC_TF_MASK;
1773	condition = tf->fsr & FPCSR_CONDVAL(cc);
1774	if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
1775		if (inst.RType.rd != ZERO)
1776			regs[inst.RType.rd] = regs[inst.RType.rs];
1777	}
1778
1779	return 0;
1780}
1781
1782#endif	/* FPUEMUL */
1783