fp_emulate.c revision 1.5
1/*	$OpenBSD: fp_emulate.c,v 1.5 2011/07/07 18:11:24 art Exp $	*/
2
3/*
4 * Copyright (c) 2010 Miodrag Vallat.
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19/*
20 * Floating Point completion/emulation code (MI softfloat code control engine).
21 *
22 * Supports all MIPS IV COP1 and COP1X floating-point instructions.
23 */
24
25#include <sys/param.h>
26#include <sys/systm.h>
27#include <sys/kernel.h>
28#include <sys/signalvar.h>
29
30#include <machine/cpu.h>
31#include <machine/fpu.h>
32#include <machine/frame.h>
33#include <machine/ieee.h>
34#include <machine/ieeefp.h>
35#include <machine/mips_opcode.h>
36#include <machine/regnum.h>
37
38#include <lib/libkern/softfloat.h>
39#if defined(DEBUG) && defined(DDB)
40#include <machine/db_machdep.h>
41#endif
42
43int	fpu_emulate(struct trap_frame *, uint32_t, union sigval *);
44int	fpu_emulate_cop1(struct trap_frame *, uint32_t);
45int	fpu_emulate_cop1x(struct trap_frame *, uint32_t);
46uint64_t
47	fpu_load(struct trap_frame *, uint, uint);
48void	fpu_store(struct trap_frame *, uint, uint, uint64_t);
49#ifdef FPUEMUL
50int	nofpu_emulate_cop1(struct trap_frame *, uint32_t, union sigval *);
51int	nofpu_emulate_cop1x(struct trap_frame *, uint32_t, union sigval *);
52int	nofpu_emulate_loadstore(struct trap_frame *, uint32_t, union sigval *);
53int	nofpu_emulate_movci(struct trap_frame *, uint32_t);
54#endif
55
56typedef	int (fpu_fn3)(struct trap_frame *, uint, uint, uint, uint);
57typedef	int (fpu_fn4)(struct trap_frame *, uint, uint, uint, uint, uint);
58fpu_fn3	fpu_abs;
59fpu_fn3	fpu_add;
60int	fpu_c(struct trap_frame *, uint, uint, uint, uint, uint);
61fpu_fn3	fpu_ceil_l;
62fpu_fn3	fpu_ceil_w;
63fpu_fn3	fpu_cvt_d;
64fpu_fn3	fpu_cvt_l;
65fpu_fn3	fpu_cvt_s;
66fpu_fn3	fpu_cvt_w;
67fpu_fn3	fpu_div;
68fpu_fn3	fpu_floor_l;
69fpu_fn3	fpu_floor_w;
70fpu_fn4	fpu_madd;
71fpu_fn4	fpu_msub;
72fpu_fn3	fpu_mov;
73fpu_fn3	fpu_movcf;
74fpu_fn3	fpu_movn;
75fpu_fn3	fpu_movz;
76fpu_fn3	fpu_mul;
77fpu_fn3	fpu_neg;
78fpu_fn4	fpu_nmadd;
79fpu_fn4	fpu_nmsub;
80fpu_fn3	fpu_recip;
81fpu_fn3	fpu_round_l;
82fpu_fn3	fpu_round_w;
83fpu_fn3	fpu_rsqrt;
84fpu_fn3	fpu_sqrt;
85fpu_fn3	fpu_sub;
86fpu_fn3	fpu_trunc_l;
87fpu_fn3	fpu_trunc_w;
88
89int	fpu_int_l(struct trap_frame *, uint, uint, uint, uint, uint);
90int	fpu_int_w(struct trap_frame *, uint, uint, uint, uint, uint);
91
92/*
93 * Encoding of operand format within opcodes `fmt' and `fmt3' fields.
94 */
95#define	FMT_S	0x00
96#define	FMT_D	0x01
97#define	FMT_W	0x04
98#define	FMT_L	0x05
99
100/*
101 * Inlines from softfloat-specialize.h which are not made public, needed
102 * for fpu_abs.
103 */
104#define	float32_is_nan(a) \
105	(0xff000000 < (a << 1))
106#define	float32_is_signaling_nan(a) \
107	((((a >> 22) & 0x1ff) == 0x1fe) && (a & 0x003fffff))
108
109/*
110 * Precomputed results of intXX_to_floatXX(1)
111 */
112#define	ONE_F32	(float32)(SNG_EXP_BIAS << SNG_FRACBITS)
113#define	ONE_F64	(float64)((uint64_t)DBL_EXP_BIAS << DBL_FRACBITS)
114
115/*
116 * Handle a floating-point exception.
117 */
118void
119MipsFPTrap(struct trap_frame *tf)
120{
121	struct cpu_info *ci = curcpu();
122	struct proc *p = ci->ci_curproc;
123	union sigval sv;
124	vaddr_t pc;
125	uint32_t fsr, excbits;
126	uint32_t insn;
127	InstFmt inst;
128	int sig = 0;
129	int fault_type = SI_NOINFO;
130	int update_pcb = 0;
131	int emulate = 0;
132#ifdef FPUEMUL
133	int skip_insn = 1;
134#else
135	uint32_t sr;
136#endif
137
138	KDASSERT(tf == p->p_md.md_regs);
139
140#ifndef FPUEMUL
141	/*
142	 * Enable FPU, and read its status register.
143	 */
144
145	sr = getsr();
146	setsr(sr | SR_COP_1_BIT);
147
148	__asm__ __volatile__ ("cfc1 %0, $31" : "=r" (fsr));
149	__asm__ __volatile__ ("cfc1 %0, $31" : "=r" (fsr));
150
151	/*
152	 * If this is not an unimplemented operation, but a genuine
153	 * FPU exception, signal the process.
154	 */
155
156	if ((fsr & FPCSR_C_E) == 0) {
157		sig = SIGFPE;
158		goto deliver;
159	}
160#else
161#ifdef CPU_OCTEON
162	/*
163	 * SR_FR_32 is hardwired to zero on Octeon; make sure it is
164	 * set in the emulation view of the FPU state.
165	 */
166	tf->sr |= SR_FR_32;
167#endif
168#endif	/* FPUEMUL */
169
170	/*
171	 * Get the faulting instruction.  This should not fail, and
172	 * if it does, it's probably not your lucky day.
173	 */
174
175	pc = (vaddr_t)tf->pc;
176	if (tf->cause & CR_BR_DELAY)
177		pc += 4;
178	if (copyin((void *)pc, &insn, sizeof insn) != 0) {
179		sig = SIGBUS;
180		fault_type = BUS_OBJERR;
181		goto deliver;
182	}
183	inst = *(InstFmt *)&insn;
184
185	/*
186	 * Emulate the instruction.
187	 */
188
189#ifdef DEBUG
190#ifdef DDB
191	printf("%s: unimplemented FPU completion, fsr 0x%08x\n%p: ",
192	    p->p_comm, fsr, pc);
193	dbmd_print_insn(insn, pc, printf);
194#else
195	printf("%s: unimplemented FPU completion, insn 0x%08x fsr 0x%08x\n",
196	    p->p_comm, insn, fsr);
197#endif
198#endif
199
200	switch (inst.FRType.op) {
201	default:
202		/*
203		 * Not a FPU instruction.
204		 */
205		break;
206#ifdef FPUEMUL
207	case OP_SPECIAL:
208		switch (inst.FRType.func) {
209		default:
210			/*
211			 * Not a FPU instruction.
212			 */
213			break;
214		case OP_MOVCI:
215			/*
216			 * This instruction should not require emulation,
217			 * unless there is no FPU.
218			 */
219			emulate = 1;
220			break;
221		}
222		break;
223	case OP_LDC1:
224	case OP_LWC1:
225	case OP_SDC1:
226	case OP_SWC1:
227		/*
228		 * These instructions should not require emulation,
229		 * unless there is no FPU.
230		 */
231		emulate = 1;
232		break;
233#endif
234	case OP_COP1:
235		switch (inst.RType.rs) {
236		case OP_BC:
237#ifdef FPUEMUL
238			skip_insn = 0;
239#endif
240		case OP_MF:
241		case OP_DMF:
242		case OP_CF:
243		case OP_MT:
244		case OP_DMT:
245		case OP_CT:
246			/*
247			 * These instructions should not require emulation,
248			 * unless there is no FPU.
249			 */
250#ifdef FPUEMUL
251			emulate = 1;
252#endif
253			break;
254		default:
255			emulate = 1;
256			break;
257		}
258		break;
259	case OP_COP1X:
260		switch (inst.FQType.op4) {
261		default:
262			switch (inst.FRType.func) {
263#ifdef FPUEMUL
264			case OP_LDXC1:
265			case OP_LWXC1:
266			case OP_SDXC1:
267			case OP_SWXC1:
268			case OP_PREFX:
269				/*
270				 * These instructions should not require
271				 * emulation, unless there is no FPU.
272				 */
273				emulate = 1;
274#endif
275			default:
276				/*
277				 * Not a valid instruction.
278				 */
279				break;
280			}
281			break;
282		case OP_MADD:
283		case OP_MSUB:
284		case OP_NMADD:
285		case OP_NMSUB:
286			emulate = 1;
287			break;
288		}
289		break;
290	}
291
292	if (emulate) {
293#ifndef FPUEMUL
294		KASSERT(p == ci->ci_fpuproc);
295		save_fpu();
296#endif
297		update_pcb = 1;
298
299		sig = fpu_emulate(tf, insn, &sv);
300		/* reload fsr, possibly modified by softfloat code */
301		fsr = tf->fsr;
302		if (sig == 0) {
303			/* raise SIGFPE if necessary */
304			excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
305			excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
306			if (excbits != 0)
307				sig = SIGFPE;
308		}
309	} else {
310		sig = SIGILL;
311		fault_type = ILL_ILLOPC;
312	}
313
314deliver:
315	switch (sig) {
316	case SIGFPE:
317		excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
318		excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
319		if (excbits & FP_X_INV)
320			fault_type = FPE_FLTINV;
321		else if (excbits & FP_X_DZ)
322			fault_type = FPE_INTDIV;
323		else if (excbits & FP_X_OFL)
324			fault_type = FPE_FLTUND;
325		else if (excbits & FP_X_UFL)
326			fault_type = FPE_FLTOVF;
327		else /* if (excbits & FP_X_IMP) */
328			fault_type = FPE_FLTRES;
329
330		break;
331#ifdef FPUEMUL
332	case SIGBUS:
333		fault_type = BUS_ADRALN;
334		break;
335	case SIGSEGV:
336		fault_type = SEGV_MAPERR;
337		break;
338#endif
339	}
340
341	/*
342	 * Skip the instruction, unless we are delivering SIGILL.
343	 */
344#ifdef FPUEMUL
345	if (skip_insn) {
346#endif
347		if (sig != SIGILL) {
348			if (tf->cause & CR_BR_DELAY) {
349				/*
350				 * Note that it doesn't matter, at this point,
351				 * that we pass the updated FSR value, as it is
352				 * only used to decide whether to branch or not
353				 * if the faulting instruction was BC1[FT].
354				 */
355				tf->pc = MipsEmulateBranch(tf, tf->pc, fsr, 0);
356			} else
357				tf->pc += 4;
358		}
359#ifdef FPUEMUL
360	}
361#endif
362
363	/*
364	 * Update the FPU status register.
365	 * We need to make sure that this will not cause an exception
366	 * in kernel mode.
367	 */
368
369	/* propagate raised exceptions to the sticky bits */
370	fsr &= ~FPCSR_C_E;
371	excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
372	fsr |= excbits << FPCSR_F_SHIFT;
373	/* clear all exception sources */
374	fsr &= ~FPCSR_C_MASK;
375	if (update_pcb)
376		tf->fsr = fsr;
377#ifndef FPUEMUL
378	__asm__ __volatile__ ("ctc1 %0, $31" :: "r" (fsr));
379	/* disable fpu before returning to trap() */
380	setsr(sr);
381#endif
382
383	if (sig != 0) {
384#ifdef FPUEMUL
385		if (sig != SIGBUS && sig != SIGSEGV)
386#endif
387			sv.sival_ptr = (void *)pc;
388		trapsignal(p, sig, 0, fault_type, sv);
389	}
390}
391
392/*
393 * Emulate an FPU instruction.  The FPU register set has been saved in the
394 * current PCB, and is pointed to by the trap frame.
395 */
396int
397fpu_emulate(struct trap_frame *tf, uint32_t insn, union sigval *sv)
398{
399	InstFmt inst;
400
401	tf->zero = 0;	/* not written by trap code */
402
403	inst = *(InstFmt *)&insn;
404	switch (inst.FRType.op) {
405	default:
406		break;
407#ifdef FPUEMUL
408	case OP_SPECIAL:
409		return nofpu_emulate_movci(tf, insn);
410	case OP_LDC1:
411	case OP_LWC1:
412	case OP_SDC1:
413	case OP_SWC1:
414		return nofpu_emulate_loadstore(tf, insn, sv);
415#endif
416	case OP_COP1:
417		switch (inst.RType.rs) {
418#ifdef FPUEMUL
419		case OP_MF:
420		case OP_DMF:
421		case OP_CF:
422		case OP_MT:
423		case OP_DMT:
424		case OP_CT:
425		case OP_BC:
426			return nofpu_emulate_cop1(tf, insn, sv);
427#endif
428		default:
429			return fpu_emulate_cop1(tf, insn);
430		}
431		break;
432	case OP_COP1X:
433		switch (inst.FQType.op4) {
434#ifdef FPUEMUL
435		default:
436			switch (inst.FRType.func) {
437			case OP_LDXC1:
438			case OP_LWXC1:
439			case OP_SDXC1:
440			case OP_SWXC1:
441			case OP_PREFX:
442				return nofpu_emulate_cop1x(tf, insn, sv);
443			default:
444				break;
445			}
446			break;
447		case OP_MADD:
448		case OP_MSUB:
449		case OP_NMADD:
450		case OP_NMSUB:
451			return fpu_emulate_cop1x(tf, insn);
452#else
453		default:
454			return fpu_emulate_cop1x(tf, insn);
455#endif
456		}
457	}
458
459	return SIGILL;
460}
461
462/*
463 * Emulate a COP1 FPU instruction.
464 */
465int
466fpu_emulate_cop1(struct trap_frame *tf, uint32_t insn)
467{
468	InstFmt inst;
469	uint ft, fs, fd;
470	fpu_fn3 *fpu_op;
471	static fpu_fn3 *const fpu_ops1[1 << 6] = {
472		fpu_add,		/* 0x00 */
473		fpu_sub,
474		fpu_mul,
475		fpu_div,
476		fpu_sqrt,
477		fpu_abs,
478		fpu_mov,
479		fpu_neg,
480		fpu_round_l,		/* 0x08 */
481		fpu_trunc_l,
482		fpu_ceil_l,
483		fpu_floor_l,
484		fpu_round_w,
485		fpu_trunc_w,
486		fpu_ceil_w,
487		fpu_floor_w,
488		NULL,			/* 0x10 */
489		fpu_movcf,
490		fpu_movz,
491		fpu_movn,
492		NULL,
493		fpu_recip,
494		fpu_rsqrt,
495		NULL,
496		NULL,			/* 0x18 */
497		NULL,
498		NULL,
499		NULL,
500		NULL,
501		NULL,
502		NULL,
503		NULL,
504		fpu_cvt_s,		/* 0x20 */
505		fpu_cvt_d,
506		NULL,
507		NULL,
508		fpu_cvt_w,
509		fpu_cvt_l,
510		NULL,
511		NULL,
512		NULL,			/* 0x28 */
513		NULL,
514		NULL,
515		NULL,
516		NULL,
517		NULL,
518		NULL,
519		NULL,
520		(fpu_fn3 *)fpu_c,	/* 0x30 */
521		(fpu_fn3 *)fpu_c,
522		(fpu_fn3 *)fpu_c,
523		(fpu_fn3 *)fpu_c,
524		(fpu_fn3 *)fpu_c,
525		(fpu_fn3 *)fpu_c,
526		(fpu_fn3 *)fpu_c,
527		(fpu_fn3 *)fpu_c,
528		(fpu_fn3 *)fpu_c,	/* 0x38 */
529		(fpu_fn3 *)fpu_c,
530		(fpu_fn3 *)fpu_c,
531		(fpu_fn3 *)fpu_c,
532		(fpu_fn3 *)fpu_c,
533		(fpu_fn3 *)fpu_c,
534		(fpu_fn3 *)fpu_c,
535		(fpu_fn3 *)fpu_c
536	};
537
538	inst = *(InstFmt *)&insn;
539
540	/*
541	 * Check for valid function code.
542	 */
543
544	fpu_op = fpu_ops1[inst.FRType.func];
545	if (fpu_op == NULL)
546		return SIGILL;
547
548	/*
549	 * Check for valid format.  FRType assumes bit 25 is always set,
550	 * so we need to check for it explicitely.
551	 */
552
553	if ((insn & (1 << 25)) == 0)
554		return SIGILL;
555	switch (inst.FRType.fmt) {
556	default:
557		return SIGILL;
558	case FMT_S:
559	case FMT_D:
560	case FMT_W:
561	case FMT_L:
562		break;
563	}
564
565	/*
566	 * Check for valid register values. Only even-numbered registers
567	 * can be used if the FR bit is clear in coprocessor 0 status
568	 * register.
569	 *
570	 * Note that c.cond does not specify a register number in the fd
571	 * field, but the fd field must have zero in its low two bits, so
572	 * the test will not reject valid c.cond instructions.
573	 */
574
575	ft = inst.FRType.ft;
576	fs = inst.FRType.fs;
577	fd = inst.FRType.fd;
578	if ((tf->sr & SR_FR_32) == 0) {
579		if ((ft | fs | fd) & 1)
580			return SIGILL;
581	}
582
583	/*
584	 * Finally dispatch to the proper routine.
585	 */
586
587	if (fpu_op == (fpu_fn3 *)&fpu_c)
588		return fpu_c(tf, inst.FRType.fmt, ft, fs, fd, inst.FRType.func);
589	else
590		return (*fpu_op)(tf, inst.FRType.fmt, ft, fs, fd);
591}
592
593/*
594 * Emulate a COP1X FPU instruction.
595 */
596int
597fpu_emulate_cop1x(struct trap_frame *tf, uint32_t insn)
598{
599	InstFmt inst;
600	uint fr, ft, fs, fd;
601	fpu_fn4 *fpu_op;
602	static fpu_fn4 *const fpu_ops1x[1 << 3] = {
603		NULL,
604		NULL,
605		NULL,
606		NULL,
607		fpu_madd,
608		fpu_msub,
609		fpu_nmadd,
610		fpu_nmsub
611	};
612
613	inst = *(InstFmt *)&insn;
614
615	/*
616	 * Check for valid function code.
617	 */
618
619	fpu_op = fpu_ops1x[inst.FQType.op4];
620	if (fpu_op == NULL)
621		return SIGILL;
622
623	/*
624	 * Check for valid format.
625	 */
626
627	switch (inst.FQType.fmt3) {
628	default:
629		return SIGILL;
630	case FMT_S:
631	case FMT_D:
632	case FMT_W:
633	case FMT_L:
634		break;
635	}
636
637	/*
638	 * Check for valid register values. Only even-numbered registers
639	 * can be used if the FR bit is clear in coprocessor 0 status
640	 * register.
641	 */
642
643	fr = inst.FQType.fr;
644	ft = inst.FQType.ft;
645	fs = inst.FQType.fs;
646	fd = inst.FQType.fd;
647	if ((tf->sr & SR_FR_32) == 0) {
648		if ((fr | ft | fs | fd) & 1)
649			return SIGILL;
650	}
651
652	/*
653	 * Finally dispatch to the proper routine.
654	 */
655
656	return (*fpu_op)(tf, inst.FRType.fmt, fr, ft, fs, fd);
657}
658
659/*
660 * Load a floating-point argument according to the specified format.
661 */
662uint64_t
663fpu_load(struct trap_frame *tf, uint fmt, uint regno)
664{
665	register_t *regs = (register_t *)tf;
666	uint64_t tmp, tmp2;
667
668	tmp = (uint64_t)regs[FPBASE + regno];
669	if (tf->sr & SR_FR_32) {
670		switch (fmt) {
671		case FMT_D:
672		case FMT_L:
673			break;
674		case FMT_S:
675		case FMT_W:
676			tmp &= 0xffffffff;
677			break;
678		}
679	} else {
680		tmp &= 0xffffffff;
681		switch (fmt) {
682		case FMT_D:
683		case FMT_L:
684			/* caller has enforced regno is even */
685			tmp2 = (uint64_t)regs[FPBASE + regno + 1];
686			tmp |= tmp2 << 32;
687			break;
688		case FMT_S:
689		case FMT_W:
690			break;
691		}
692	}
693
694	return tmp;
695}
696
697/*
698 * Store a floating-point result according to the specified format.
699 */
700void
701fpu_store(struct trap_frame *tf, uint fmt, uint regno, uint64_t rslt)
702{
703	register_t *regs = (register_t *)tf;
704
705	if (tf->sr & SR_FR_32) {
706		regs[FPBASE + regno] = rslt;
707	} else {
708		/* caller has enforced regno is even */
709		regs[FPBASE + regno] = rslt & 0xffffffff;
710		regs[FPBASE + regno + 1] = (rslt >> 32) & 0xffffffff;
711	}
712}
713
714/*
715 * Integer conversion
716 */
717
718int
719fpu_int_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint rm)
720{
721	uint64_t raw;
722	uint32_t oldrm;
723
724	if (ft != 0)
725		return SIGILL;
726	if (fmt != FMT_S && fmt != FMT_D)
727		return SIGILL;
728
729	raw = fpu_load(tf, fmt, fs);
730
731	/* round towards required mode */
732	oldrm = tf->fsr & FPCSR_RM_MASK;
733	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
734	if (fmt == FMT_S)
735		raw = float32_to_int64((float32)raw);
736	else
737		raw = float64_to_int64((float64)raw);
738	/* restore rounding mode */
739	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;
740
741	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
742		fpu_store(tf, fmt, fd, raw);
743
744	return 0;
745}
746
747int
748fpu_int_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint rm)
749{
750	uint64_t raw;
751	uint32_t oldrm;
752
753	if (ft != 0)
754		return SIGILL;
755	if (fmt != FMT_S && fmt != FMT_D)
756		return SIGILL;
757
758	raw = fpu_load(tf, fmt, fs);
759
760	/* round towards required mode */
761	oldrm = tf->fsr & FPCSR_RM_MASK;
762	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
763	if (fmt == FMT_S)
764		raw = float32_to_int32((float32)raw);
765	else
766		raw = float64_to_int32((float64)raw);
767	/* restore rounding mode */
768	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;
769
770	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
771		fpu_store(tf, fmt, fd, raw);
772
773	return 0;
774}
775
776/*
777 * FPU Instruction emulation
778 */
779
780int
781fpu_abs(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
782{
783	uint64_t raw;
784
785	if (ft != 0)
786		return SIGILL;
787	if (fmt != FMT_S && fmt != FMT_D)
788		return SIGILL;
789
790	raw = fpu_load(tf, fmt, fs);
791	/* clear sign bit unless NaN */
792	if (fmt == FMT_S) {
793		float32 f32 = (float32)raw;
794		if (float32_is_nan(f32)) {
795			float_set_invalid();
796		} else {
797			f32 &= ~(1L << 31);
798			raw = (uint64_t)f32;
799		}
800	} else {
801		float64 f64 = (float64)raw;
802		if (float64_is_nan(f64)) {
803			float_set_invalid();
804		} else {
805			f64 &= ~(1L << 63);
806			raw = (uint64_t)f64;
807		}
808	}
809	fpu_store(tf, fmt, fd, raw);
810
811	return 0;
812}
813
814int
815fpu_add(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
816{
817	uint64_t raw1, raw2, rslt;
818
819	if (fmt != FMT_S && fmt != FMT_D)
820		return SIGILL;
821
822	raw1 = fpu_load(tf, fmt, fs);
823	raw2 = fpu_load(tf, fmt, ft);
824	if (fmt == FMT_S) {
825		float32 f32 = float32_add((float32)raw1, (float32)raw2);
826		rslt = (uint64_t)f32;
827	} else {
828		float64 f64 = float64_add((float64)raw1, (float64)raw2);
829		rslt = (uint64_t)f64;
830	}
831	fpu_store(tf, fmt, fd, rslt);
832
833	return 0;
834}
835
836int
837fpu_c(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd, uint op)
838{
839	uint64_t raw1, raw2;
840	uint cc, lt, eq, uo;
841
842	if ((fd & 0x03) != 0)
843		return SIGILL;
844	if (fmt != FMT_S && fmt != FMT_D)
845		return SIGILL;
846
847	lt = eq = uo = 0;
848	cc = fd >> 2;
849
850	raw1 = fpu_load(tf, fmt, fs);
851	raw2 = fpu_load(tf, fmt, ft);
852
853	if (fmt == FMT_S) {
854		float32 f32a = (float32)raw1;
855		float32 f32b = (float32)raw2;
856		if (float32_is_nan(f32a)) {
857			uo = 1 << 0;
858			if (float32_is_signaling_nan(f32a))
859				op |= 0x08;	/* force invalid exception */
860		}
861		if (float32_is_nan(f32b)) {
862			uo = 1 << 0;
863			if (float32_is_signaling_nan(f32b))
864				op |= 0x08;	/* force invalid exception */
865		}
866		if (uo == 0) {
867			if (float32_eq(f32a, f32b))
868				eq = 1 << 1;
869			else if (float32_lt(f32a, f32b))
870				lt = 1 << 2;
871		}
872	} else {
873		float64 f64a = (float64)raw1;
874		float64 f64b = (float64)raw2;
875		if (float64_is_nan(f64a)) {
876			uo = 1 << 0;
877			if (float64_is_signaling_nan(f64a))
878				op |= 0x08;	/* force invalid exception */
879		}
880		if (float64_is_nan(f64b)) {
881			uo = 1 << 0;
882			if (float64_is_signaling_nan(f64b))
883				op |= 0x08;	/* force invalid exception */
884		}
885		if (uo == 0) {
886			if (float64_eq(f64a, f64b))
887				eq = 1 << 1;
888			else if (float64_lt(f64a, f64b))
889				lt = 1 << 2;
890		}
891	}
892
893	if (uo && (op & 0x08)) {
894		float_set_invalid();
895		if (tf->fsr & FPCSR_E_V) {
896			/* comparison result intentionaly not written */
897			goto skip;
898		}
899	} else {
900		if ((uo | eq | lt) & op)
901			tf->fsr |= FPCSR_CONDVAL(cc);
902		else
903			tf->fsr &= ~FPCSR_CONDVAL(cc);
904	}
905skip:
906
907	return 0;
908}
909
910int
911fpu_ceil_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
912{
913	/* round towards positive infinity */
914	return fpu_int_l(tf, fmt, ft, fs, fd, FP_RP);
915}
916
917int
918fpu_ceil_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
919{
920	/* round towards positive infinity */
921	return fpu_int_w(tf, fmt, ft, fs, fd, FP_RP);
922}
923
924int
925fpu_cvt_d(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
926{
927	uint64_t raw;
928
929	if (ft != 0)
930		return SIGILL;
931	if (fmt == FMT_D)
932		return SIGILL;
933
934	raw = fpu_load(tf, fmt, fs);
935	switch (fmt) {
936	case FMT_L:
937		raw = int64_to_float64((int64_t)raw);
938		break;
939	case FMT_S:
940		raw = float32_to_float64((float32)raw);
941		break;
942	case FMT_W:
943		raw = int32_to_float64((int32_t)raw);
944		break;
945	}
946	fpu_store(tf, fmt, fd, raw);
947
948	return 0;
949}
950
951int
952fpu_cvt_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
953{
954	uint64_t raw;
955	uint32_t rm;
956
957	if (ft != 0)
958		return SIGILL;
959	if (fmt != FMT_S && fmt != FMT_D)
960		return SIGILL;
961
962	rm = tf->fsr & FPCSR_RM_MASK;
963	raw = fpu_load(tf, fmt, fs);
964	if (fmt == FMT_D) {
965		if (rm == FP_RZ)
966			raw = float64_to_int64_round_to_zero((float64)raw);
967		else
968			raw = float64_to_int64((float64)raw);
969	} else {
970		if (rm == FP_RZ)
971			raw = float32_to_int64_round_to_zero((float32)raw);
972		else
973			raw = float32_to_int64((float32)raw);
974	}
975	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
976		fpu_store(tf, fmt, fd, raw);
977
978	return 0;
979}
980
981int
982fpu_cvt_s(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
983{
984	uint64_t raw;
985
986	if (ft != 0)
987		return SIGILL;
988	if (fmt == FMT_S)
989		return SIGILL;
990
991	raw = fpu_load(tf, fmt, fs);
992	switch (fmt) {
993	case FMT_D:
994		raw = float64_to_float32((float64)raw);
995		break;
996	case FMT_L:
997		raw = int64_to_float32((int64_t)raw);
998		break;
999	case FMT_W:
1000		raw = int32_to_float32((int32_t)raw);
1001		break;
1002	}
1003	fpu_store(tf, fmt, fd, raw);
1004
1005	return 0;
1006}
1007
1008int
1009fpu_cvt_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1010{
1011	uint64_t raw;
1012	uint32_t rm;
1013
1014	if (ft != 0)
1015		return SIGILL;
1016	if (fmt != FMT_S && fmt != FMT_D)
1017		return SIGILL;
1018
1019	rm = tf->fsr & FPCSR_RM_MASK;
1020	raw = fpu_load(tf, fmt, fs);
1021	if (fmt == FMT_D) {
1022		if (rm == FP_RZ)
1023			raw = float64_to_int32_round_to_zero((float64)raw);
1024		else
1025			raw = float64_to_int32((float64)raw);
1026	} else {
1027		if (rm == FP_RZ)
1028			raw = float32_to_int32_round_to_zero((float32)raw);
1029		else
1030			raw = float32_to_int32((float32)raw);
1031	}
1032	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
1033		fpu_store(tf, fmt, fd, raw);
1034
1035	return 0;
1036}
1037
1038int
1039fpu_div(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1040{
1041	uint64_t raw1, raw2, rslt;
1042
1043	if (fmt != FMT_S && fmt != FMT_D)
1044		return SIGILL;
1045
1046	raw1 = fpu_load(tf, fmt, fs);
1047	raw2 = fpu_load(tf, fmt, ft);
1048	if (fmt == FMT_S) {
1049		float32 f32 = float32_div((float32)raw1, (float32)raw2);
1050		rslt = (uint64_t)f32;
1051	} else {
1052		float64 f64 = float64_div((float64)raw1, (float64)raw2);
1053		rslt = (uint64_t)f64;
1054	}
1055	fpu_store(tf, fmt, fd, rslt);
1056
1057	return 0;
1058}
1059
1060int
1061fpu_floor_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1062{
1063	/* round towards negative infinity */
1064	return fpu_int_l(tf, fmt, ft, fs, fd, FP_RM);
1065}
1066
1067int
1068fpu_floor_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1069{
1070	/* round towards negative infinity */
1071	return fpu_int_w(tf, fmt, ft, fs, fd, FP_RM);
1072}
1073
1074int
1075fpu_madd(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)
1076{
1077	uint64_t raw1, raw2, raw3, rslt;
1078
1079	if (fmt != FMT_S && fmt != FMT_D)
1080		return SIGILL;
1081
1082	raw1 = fpu_load(tf, fmt, fs);
1083	raw2 = fpu_load(tf, fmt, ft);
1084	raw3 = fpu_load(tf, fmt, fr);
1085	if (fmt == FMT_S) {
1086		float32 f32 = float32_add(
1087		    float32_mul((float32)raw1, (float32)raw2),
1088		    (float32)raw3);
1089		rslt = (uint64_t)f32;
1090	} else {
1091		float64 f64 = float64_add(
1092		    float64_mul((float64)raw1, (float64)raw2),
1093		    (float64)raw3);
1094		rslt = (uint64_t)f64;
1095	}
1096	fpu_store(tf, fmt, fd, rslt);
1097
1098	return 0;
1099}
1100
1101int
1102fpu_mov(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1103{
1104	uint64_t raw;
1105
1106	if (ft != 0)
1107		return SIGILL;
1108	if (fmt != FMT_S && fmt != FMT_D)
1109		return SIGILL;
1110
1111	raw = fpu_load(tf, fmt, fs);
1112	fpu_store(tf, fmt, fd, raw);
1113
1114	return 0;
1115}
1116
1117int
1118fpu_movcf(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1119{
1120	uint64_t raw;
1121	uint cc, istf;
1122	int condition;
1123
1124	if ((ft & 0x02) != 0)
1125		return SIGILL;
1126	cc = ft >> 2;
1127	if (fmt != FMT_S && fmt != FMT_D)
1128		return SIGILL;
1129
1130	condition = tf->fsr & FPCSR_CONDVAL(cc);
1131	istf = ft & COPz_BC_TF_MASK;
1132	if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
1133		raw = fpu_load(tf, fmt, fs);
1134		fpu_store(tf, fmt, fd, raw);
1135	}
1136
1137	return 0;
1138}
1139
1140int
1141fpu_movn(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1142{
1143	register_t *regs = (register_t *)tf;
1144	uint64_t raw;
1145
1146	if (fmt != FMT_S && fmt != FMT_D)
1147		return SIGILL;
1148
1149	if (ft != ZERO && regs[ft] != 0) {
1150		raw = fpu_load(tf, fmt, fs);
1151		fpu_store(tf, fmt, fd, raw);
1152	}
1153
1154	return 0;
1155}
1156
1157int
1158fpu_movz(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1159{
1160	register_t *regs = (register_t *)tf;
1161	uint64_t raw;
1162
1163	if (fmt != FMT_S && fmt != FMT_D)
1164		return SIGILL;
1165
1166	if (ft == ZERO || regs[ft] == 0) {
1167		raw = fpu_load(tf, fmt, fs);
1168		fpu_store(tf, fmt, fd, raw);
1169	}
1170
1171	return 0;
1172}
1173
1174int
1175fpu_msub(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)
1176{
1177	uint64_t raw1, raw2, raw3, rslt;
1178
1179	if (fmt != FMT_S && fmt != FMT_D)
1180		return SIGILL;
1181
1182	raw1 = fpu_load(tf, fmt, fs);
1183	raw2 = fpu_load(tf, fmt, ft);
1184	raw3 = fpu_load(tf, fmt, fr);
1185	if (fmt == FMT_S) {
1186		float32 f32 = float32_sub(
1187		    float32_mul((float32)raw1, (float32)raw2),
1188		    (float32)raw3);
1189		rslt = (uint64_t)f32;
1190	} else {
1191		float64 f64 = float64_sub(
1192		    float64_mul((float64)raw1, (float64)raw2),
1193		    (float64)raw3);
1194		rslt = (uint64_t)f64;
1195	}
1196	fpu_store(tf, fmt, fd, rslt);
1197
1198	return 0;
1199}
1200
1201int
1202fpu_mul(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1203{
1204	uint64_t raw1, raw2, rslt;
1205
1206	if (fmt != FMT_S && fmt != FMT_D)
1207		return SIGILL;
1208
1209	raw1 = fpu_load(tf, fmt, fs);
1210	raw2 = fpu_load(tf, fmt, ft);
1211	if (fmt == FMT_S) {
1212		float32 f32 = float32_mul((float32)raw1, (float32)raw2);
1213		rslt = (uint64_t)f32;
1214	} else {
1215		float64 f64 = float64_mul((float64)raw1, (float64)raw2);
1216		rslt = (uint64_t)f64;
1217	}
1218	fpu_store(tf, fmt, fd, rslt);
1219
1220	return 0;
1221}
1222
1223int
1224fpu_neg(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1225{
1226	uint64_t raw;
1227
1228	if (ft != 0)
1229		return SIGILL;
1230	if (fmt != FMT_S && fmt != FMT_D)
1231		return SIGILL;
1232
1233	raw = fpu_load(tf, fmt, fs);
1234	/* flip sign bit unless NaN */
1235	if (fmt == FMT_S) {
1236		float32 f32 = (float32)raw;
1237		if (float32_is_nan(f32)) {
1238			float_set_invalid();
1239		} else {
1240			f32 ^= 1L << 31;
1241			raw = (uint64_t)f32;
1242		}
1243	} else {
1244		float64 f64 = (float64)raw;
1245		if (float64_is_nan(f64)) {
1246			float_set_invalid();
1247		} else {
1248			f64 ^= 1L << 63;
1249			raw = (uint64_t)f64;
1250		}
1251	}
1252	fpu_store(tf, fmt, fd, raw);
1253
1254	return 0;
1255}
1256
1257int
1258fpu_nmadd(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)
1259{
1260	uint64_t raw1, raw2, raw3, rslt;
1261
1262	if (fmt != FMT_S && fmt != FMT_D)
1263		return SIGILL;
1264
1265	raw1 = fpu_load(tf, fmt, fs);
1266	raw2 = fpu_load(tf, fmt, ft);
1267	raw3 = fpu_load(tf, fmt, fr);
1268	if (fmt == FMT_S) {
1269		float32 f32 = float32_add(
1270		    float32_mul((float32)raw1, (float32)raw2),
1271		    (float32)raw3);
1272		if (float32_is_nan(f32))
1273			float_set_invalid();
1274		else
1275			f32 ^= 1L << 31;
1276		rslt = (uint64_t)f32;
1277	} else {
1278		float64 f64 = float64_add(
1279		    float64_mul((float64)raw1, (float64)raw2),
1280		    (float64)raw3);
1281		if (float64_is_nan(f64))
1282			float_set_invalid();
1283		else
1284			f64 ^= 1L << 63;
1285		rslt = (uint64_t)f64;
1286	}
1287	fpu_store(tf, fmt, fd, rslt);
1288
1289	return 0;
1290}
1291
1292int
1293fpu_nmsub(struct trap_frame *tf, uint fmt, uint fr, uint ft, uint fs, uint fd)
1294{
1295	uint64_t raw1, raw2, raw3, rslt;
1296
1297	if (fmt != FMT_S && fmt != FMT_D)
1298		return SIGILL;
1299
1300	raw1 = fpu_load(tf, fmt, fs);
1301	raw2 = fpu_load(tf, fmt, ft);
1302	raw3 = fpu_load(tf, fmt, fr);
1303	if (fmt == FMT_S) {
1304		float32 f32 = float32_sub(
1305		    float32_mul((float32)raw1, (float32)raw2),
1306		    (float32)raw3);
1307		if (float32_is_nan(f32))
1308			float_set_invalid();
1309		else
1310			f32 ^= 1L << 31;
1311		rslt = (uint64_t)f32;
1312	} else {
1313		float64 f64 = float64_sub(
1314		    float64_mul((float64)raw1, (float64)raw2),
1315		    (float64)raw3);
1316		if (float64_is_nan(f64))
1317			float_set_invalid();
1318		else
1319			f64 ^= 1L << 63;
1320		rslt = (uint64_t)f64;
1321	}
1322	fpu_store(tf, fmt, fd, rslt);
1323
1324	return 0;
1325}
1326
1327int
1328fpu_recip(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1329{
1330	uint64_t raw;
1331
1332	if (ft != 0)
1333		return SIGILL;
1334	if (fmt != FMT_S && fmt != FMT_D)
1335		return SIGILL;
1336
1337	raw = fpu_load(tf, fmt, fs);
1338	if (fmt == FMT_S) {
1339		float32 f32 = float32_div(ONE_F32, (float32)raw);
1340		raw = (uint64_t)f32;
1341	} else {
1342		float64 f64 = float64_div(ONE_F64, (float64)raw);
1343		raw = (uint64_t)f64;
1344	}
1345	fpu_store(tf, fmt, fd, raw);
1346
1347	return 0;
1348}
1349
1350int
1351fpu_round_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1352{
1353	/* round towards nearest */
1354	return fpu_int_l(tf, fmt, ft, fs, fd, FP_RN);
1355}
1356
1357int
1358fpu_round_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1359{
1360	/* round towards nearest */
1361	return fpu_int_w(tf, fmt, ft, fs, fd, FP_RN);
1362}
1363
1364int
1365fpu_rsqrt(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1366{
1367	uint64_t raw;
1368
1369	if (ft != 0)
1370		return SIGILL;
1371	if (fmt != FMT_S && fmt != FMT_D)
1372		return SIGILL;
1373
1374	raw = fpu_load(tf, fmt, fs);
1375	if (fmt == FMT_S) {
1376		float32 f32 = float32_sqrt((float32)raw);
1377		if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
1378		    (FPCSR_C_V | FPCSR_E_V))
1379			f32 = float32_div(ONE_F32, f32);
1380		raw = (uint64_t)f32;
1381	} else {
1382		float64 f64 = float64_sqrt((float64)raw);
1383		if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
1384		    (FPCSR_C_V | FPCSR_E_V))
1385			f64 = float64_div(ONE_F64, f64);
1386		raw = (uint64_t)f64;
1387	}
1388	fpu_store(tf, fmt, fd, raw);
1389
1390	return 0;
1391}
1392
1393int
1394fpu_sqrt(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1395{
1396	uint64_t raw;
1397
1398	if (ft != 0)
1399		return SIGILL;
1400	if (fmt != FMT_S && fmt != FMT_D)
1401		return SIGILL;
1402
1403	raw = fpu_load(tf, fmt, fs);
1404	if (fmt == FMT_S) {
1405		float32 f32 = float32_sqrt((float32)raw);
1406		raw = (uint64_t)f32;
1407	} else {
1408		float64 f64 = float64_sqrt((float64)raw);
1409		raw = (uint64_t)f64;
1410	}
1411	fpu_store(tf, fmt, fd, raw);
1412
1413	return 0;
1414}
1415
1416int
1417fpu_sub(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1418{
1419	uint64_t raw1, raw2, rslt;
1420
1421	if (fmt != FMT_S && fmt != FMT_D)
1422		return SIGILL;
1423
1424	raw1 = fpu_load(tf, fmt, fs);
1425	raw2 = fpu_load(tf, fmt, ft);
1426	if (fmt == FMT_S) {
1427		float32 f32 = float32_sub((float32)raw1, (float32)raw2);
1428		rslt = (uint64_t)f32;
1429	} else {
1430		float64 f64 = float64_sub((float64)raw1, (float64)raw2);
1431		rslt = (uint64_t)f64;
1432	}
1433	fpu_store(tf, fmt, fd, rslt);
1434
1435	return 0;
1436}
1437
1438int
1439fpu_trunc_l(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1440{
1441	/* round towards zero */
1442	return fpu_int_l(tf, fmt, ft, fs, fd, FP_RZ);
1443}
1444
1445int
1446fpu_trunc_w(struct trap_frame *tf, uint fmt, uint ft, uint fs, uint fd)
1447{
1448	/* round towards zero */
1449	return fpu_int_w(tf, fmt, ft, fs, fd, FP_RZ);
1450}
1451
1452#ifdef FPUEMUL
1453
1454/*
1455 * Emulate a COP1 non-FPU instruction.
1456 */
1457int
1458nofpu_emulate_cop1(struct trap_frame *tf, uint32_t insn, union sigval *sv)
1459{
1460	register_t *regs = (register_t *)tf;
1461	InstFmt inst;
1462	int32_t cval;
1463
1464	inst = *(InstFmt *)&insn;
1465
1466	switch (inst.RType.rs) {
1467	case OP_MF:
1468		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1469			return SIGILL;
1470		if (inst.FRType.ft != ZERO)
1471			regs[inst.FRType.ft] =
1472			    (int32_t)regs[FPBASE + inst.FRType.fs];
1473		break;
1474	case OP_DMF:
1475		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1476			return SIGILL;
1477		if ((tf->sr & SR_FR_32) != 0 || (inst.FRType.fs & 1) == 0) {
1478			if (inst.FRType.ft != ZERO)
1479				regs[inst.FRType.ft] =
1480				    fpu_load(tf, FMT_L, inst.FRType.fs);
1481		}
1482		break;
1483	case OP_CF:
1484		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1485			return SIGILL;
1486		if (inst.FRType.ft != ZERO) {
1487			switch (inst.FRType.fs) {
1488			case 0:	/* FPC_ID */
1489				cval = MIPS_SOFT << 8;
1490				break;
1491			case 31: /* FPC_CSR */
1492				cval = (int32_t)tf->fsr;
1493				break;
1494			default:
1495				cval = 0;
1496				break;
1497			}
1498			regs[inst.FRType.ft] = (int64_t)cval;
1499		}
1500		break;
1501	case OP_MT:
1502		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1503			return SIGILL;
1504		regs[FPBASE + inst.FRType.fs] = (int32_t)regs[inst.FRType.ft];
1505		break;
1506	case OP_DMT:
1507		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1508			return SIGILL;
1509		if ((tf->sr & SR_FR_32) != 0 || (inst.FRType.fs & 1) == 0) {
1510			fpu_store(tf, FMT_L, inst.FRType.fs,
1511			    regs[inst.FRType.ft]);
1512		}
1513		break;
1514	case OP_CT:
1515		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1516			return SIGILL;
1517		cval = (int32_t)regs[inst.FRType.ft];
1518		switch (inst.FRType.fs) {
1519		case 31: /* FPC_CSR */
1520			cval &= ~FPCSR_C_E;
1521			tf->fsr = cval;
1522			break;
1523		case 0:	/* FPC_ID */
1524		default:
1525			break;
1526		}
1527		break;
1528	case OP_BC:
1529	   {
1530		uint cc, nd, istf;
1531		int condition;
1532		vaddr_t dest;
1533		uint32_t dinsn;
1534
1535		cc = (inst.RType.rt & COPz_BC_CC_MASK) >> COPz_BC_CC_SHIFT;
1536		nd = inst.RType.rt & COPz_BCL_TF_MASK;
1537		istf = inst.RType.rt & COPz_BC_TF_MASK;
1538		condition = tf->fsr & FPCSR_CONDVAL(cc);
1539		if ((!condition && !istf) /*bc1f*/ ||
1540		    (condition && istf) /*bc1t*/) {
1541			/*
1542			 * Branch taken: if the delay slot is not a nop,
1543			 * copy the delay slot instruction to the dedicated
1544			 * relocation page, in order to be able to have the
1545			 * cpu process it and give control back to the
1546			 * kernel, for us to redirect to the branch
1547			 * destination.
1548			 */
1549			/* inline MipsEmulateBranch(tf, tf->pc, tf->fsr, insn)*/
1550			dest = tf->pc + 4 + ((short)inst.IType.imm << 2);
1551			if (copyin((const void *)(tf->pc + 4), &dinsn,
1552			    sizeof dinsn)) {
1553				sv->sival_ptr = (void *)(tf->pc + 4);
1554				return SIGSEGV;
1555			}
1556			if (dinsn == 0x00000000 /* nop */ ||
1557			    dinsn == 0x00000040 /* ssnop */) {
1558				tf->pc = dest;
1559			} else {
1560				if (fpe_branch_emulate(curproc, tf, dinsn,
1561				    dest) != 0)
1562					return SIGILL;
1563			}
1564		} else {
1565			/*
1566			 * Branch not taken: skip the instruction, and
1567			 * skip the delay slot if it was a `branch likely'
1568			 * instruction.
1569			 */
1570			tf->pc += 4;
1571			if (nd)
1572				tf->pc += 4;
1573		}
1574	    }
1575		break;
1576	}
1577
1578	return 0;
1579}
1580
1581/*
1582 * Emulate a COP1X non-FPU instruction.
1583 */
1584int
1585nofpu_emulate_cop1x(struct trap_frame *tf, uint32_t insn, union sigval *sv)
1586{
1587	register_t *regs = (register_t *)tf;
1588	InstFmt inst;
1589	vaddr_t va;
1590	uint64_t ddata;
1591	uint32_t wdata;
1592
1593	inst = *(InstFmt *)&insn;
1594	switch (inst.FRType.func) {
1595	case OP_LDXC1:
1596		if (inst.FQType.fs != 0)
1597			return SIGILL;
1598		va = (vaddr_t)regs[inst.FQType.fr] +
1599		    (vaddr_t)regs[inst.FQType.ft];
1600		if ((va & 0x07) != 0) {
1601			sv->sival_ptr = (void *)va;
1602			return SIGBUS;
1603		}
1604		if (copyin((const void *)va, &ddata, sizeof ddata) != 0) {
1605			sv->sival_ptr = (void *)va;
1606			return SIGSEGV;
1607		}
1608		if ((tf->sr & SR_FR_32) != 0 || (inst.FQType.fd & 1) == 0)
1609			fpu_store(tf, FMT_L, inst.FQType.fd, ddata);
1610		break;
1611	case OP_LWXC1:
1612		if (inst.FQType.fs != 0)
1613			return SIGILL;
1614		va = (vaddr_t)regs[inst.FQType.fr] +
1615		    (vaddr_t)regs[inst.FQType.ft];
1616		if ((va & 0x03) != 0) {
1617			sv->sival_ptr = (void *)va;
1618			return SIGBUS;
1619		}
1620#ifdef __MIPSEB__
1621		va ^= 4;
1622#endif
1623		if (copyin((const void *)va, &wdata, sizeof wdata) != 0) {
1624			sv->sival_ptr = (void *)va;
1625			return SIGSEGV;
1626		}
1627		regs[FPBASE + inst.FQType.fd] = wdata;
1628		break;
1629	case OP_SDXC1:
1630		if (inst.FQType.fd != 0)
1631			return SIGILL;
1632		va = (vaddr_t)regs[inst.FQType.fr] +
1633		    (vaddr_t)regs[inst.FQType.ft];
1634		if ((va & 0x07) != 0) {
1635			sv->sival_ptr = (void *)va;
1636			return SIGBUS;
1637		}
1638		if ((tf->sr & SR_FR_32) != 0 || (inst.FQType.fs & 1) == 0)
1639			ddata = fpu_load(tf, FMT_L, inst.FQType.fs);
1640		else {
1641			/* undefined behaviour, don't expose stack content */
1642			ddata = 0;
1643		}
1644		if (copyout(&ddata, (void *)va, sizeof ddata) != 0) {
1645			sv->sival_ptr = (void *)va;
1646			return SIGSEGV;
1647		}
1648		break;
1649	case OP_SWXC1:
1650		if (inst.FQType.fd != 0)
1651			return SIGILL;
1652		va = (vaddr_t)regs[inst.FQType.fr] +
1653		    (vaddr_t)regs[inst.FQType.ft];
1654		if ((va & 0x03) != 0) {
1655			sv->sival_ptr = (void *)va;
1656			return SIGBUS;
1657		}
1658#ifdef __MIPSEB__
1659		va ^= 4;
1660#endif
1661		wdata = regs[FPBASE + inst.FQType.fs];
1662		if (copyout(&wdata, (void *)va, sizeof wdata) != 0) {
1663			sv->sival_ptr = (void *)va;
1664			return SIGSEGV;
1665		}
1666		break;
1667	case OP_PREFX:
1668		/* nothing to do */
1669		break;
1670	}
1671
1672	return 0;
1673}
1674
1675/*
1676 * Emulate a load/store instruction on FPU registers.
1677 */
1678int
1679nofpu_emulate_loadstore(struct trap_frame *tf, uint32_t insn, union sigval *sv)
1680{
1681	register_t *regs = (register_t *)tf;
1682	InstFmt inst;
1683	vaddr_t va;
1684	uint64_t ddata;
1685	uint32_t wdata;
1686
1687	inst = *(InstFmt *)&insn;
1688	switch (inst.IType.op) {
1689	case OP_LDC1:
1690		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1691		if ((va & 0x07) != 0) {
1692			sv->sival_ptr = (void *)va;
1693			return SIGBUS;
1694		}
1695		if (copyin((const void *)va, &ddata, sizeof ddata) != 0) {
1696			sv->sival_ptr = (void *)va;
1697			return SIGSEGV;
1698		}
1699		if ((tf->sr & SR_FR_32) != 0 || (inst.IType.rt & 1) == 0)
1700			fpu_store(tf, FMT_L, inst.IType.rt, ddata);
1701		break;
1702	case OP_LWC1:
1703		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1704		if ((va & 0x03) != 0) {
1705			sv->sival_ptr = (void *)va;
1706			return SIGBUS;
1707		}
1708#ifdef __MIPSEB__
1709		va ^= 4;
1710#endif
1711		if (copyin((const void *)va, &wdata, sizeof wdata) != 0) {
1712			sv->sival_ptr = (void *)va;
1713			return SIGSEGV;
1714		}
1715		regs[FPBASE + inst.IType.rt] = wdata;
1716		break;
1717	case OP_SDC1:
1718		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1719		if ((va & 0x07) != 0) {
1720			sv->sival_ptr = (void *)va;
1721			return SIGBUS;
1722		}
1723		if ((tf->sr & SR_FR_32) != 0 || (inst.IType.rt & 1) == 0)
1724			ddata = fpu_load(tf, FMT_L, inst.IType.rt);
1725		else {
1726			/* undefined behaviour, don't expose stack content */
1727			ddata = 0;
1728		}
1729		if (copyout(&ddata, (void *)va, sizeof ddata) != 0) {
1730			sv->sival_ptr = (void *)va;
1731			return SIGSEGV;
1732		}
1733		break;
1734	case OP_SWC1:
1735		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1736		if ((va & 0x03) != 0) {
1737			sv->sival_ptr = (void *)va;
1738			return SIGBUS;
1739		}
1740#ifdef __MIPSEB__
1741		va ^= 4;
1742#endif
1743		wdata = regs[FPBASE + inst.IType.rt];
1744		if (copyout(&wdata, (void *)va, sizeof wdata) != 0) {
1745			sv->sival_ptr = (void *)va;
1746			return SIGSEGV;
1747		}
1748		break;
1749	}
1750
1751	return 0;
1752}
1753
1754/*
1755 * Emulate MOVF and MOVT.
1756 */
1757int
1758nofpu_emulate_movci(struct trap_frame *tf, uint32_t insn)
1759{
1760	register_t *regs = (register_t *)tf;
1761	InstFmt inst;
1762	uint cc, istf;
1763	int condition;
1764
1765	inst = *(InstFmt *)&insn;
1766	if ((inst.RType.rt & 0x02) != 0 || inst.RType.shamt != 0)
1767		return SIGILL;
1768
1769	cc = inst.RType.rt >> 2;
1770	istf = inst.RType.rt & COPz_BC_TF_MASK;
1771	condition = tf->fsr & FPCSR_CONDVAL(cc);
1772	if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
1773		if (inst.RType.rd != ZERO)
1774			regs[inst.RType.rd] = regs[inst.RType.rs];
1775	}
1776
1777	return 0;
1778}
1779
1780#endif	/* FPUEMUL */
1781