fp_emulate.c revision 1.16
1/*	$OpenBSD: fp_emulate.c,v 1.16 2017/08/26 15:21:48 visa Exp $	*/
2
3/*
4 * Copyright (c) 2010 Miodrag Vallat.
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19/*
20 * Floating Point completion/emulation code (MI softfloat code control engine).
21 *
22 * Supports all MIPS IV COP1 and COP1X floating-point instructions.
23 *
24 * Floating-point load and store instructions, as well as branch instructions,
25 * are only handled if the kernel is compiled with option FPUEMUL.
26 */
27
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/kernel.h>
31#include <sys/signalvar.h>
32
33#include <machine/cpu.h>
34#include <mips64/mips_cpu.h>
35#include <machine/fpu.h>
36#include <machine/frame.h>
37#include <machine/ieee.h>
38#include <machine/ieeefp.h>
39#include <machine/mips_opcode.h>
40#include <machine/regnum.h>
41
42#include <lib/libkern/softfloat.h>
43#if defined(DEBUG) && defined(DDB)
44#include <machine/db_machdep.h>
45#endif
46
47int	fpu_emulate(struct proc *, struct trapframe *, uint32_t,
48	    union sigval *);
49int	fpu_emulate_cop1(struct proc *, struct trapframe *, uint32_t);
50int	fpu_emulate_cop1x(struct proc *, struct trapframe *, uint32_t);
51uint64_t
52	fpu_load(struct proc *, struct trapframe *, uint, uint);
53void	fpu_store(struct proc *, struct trapframe *, uint, uint, uint64_t);
54#ifdef FPUEMUL
55int	nofpu_emulate_cop1(struct proc *, struct trapframe *, uint32_t,
56	    union sigval *);
57int	nofpu_emulate_cop1x(struct proc *, struct trapframe *, uint32_t,
58	    union sigval *);
59int	nofpu_emulate_loadstore(struct proc *, struct trapframe *, uint32_t,
60	    union sigval *);
61int	nofpu_emulate_movci(struct trapframe *, uint32_t);
62#endif
63
64typedef	int (fpu_fn3)(struct proc *, struct trapframe *, uint, uint, uint,
65	    uint);
66typedef	int (fpu_fn4)(struct proc *, struct trapframe *, uint, uint, uint,
67	    uint, uint);
68fpu_fn3	fpu_abs;
69fpu_fn3	fpu_add;
70int	fpu_c(struct proc *, struct trapframe *, uint, uint, uint, uint, uint);
71fpu_fn3	fpu_ceil_l;
72fpu_fn3	fpu_ceil_w;
73fpu_fn3	fpu_cvt_d;
74fpu_fn3	fpu_cvt_l;
75fpu_fn3	fpu_cvt_s;
76fpu_fn3	fpu_cvt_w;
77fpu_fn3	fpu_div;
78fpu_fn3	fpu_floor_l;
79fpu_fn3	fpu_floor_w;
80int	fpu_int_l(struct proc *, struct trapframe *, uint, uint, uint, uint,
81	    uint);
82int	fpu_int_w(struct proc *, struct trapframe *, uint, uint, uint, uint,
83	    uint);
84fpu_fn4	fpu_madd;
85fpu_fn4	fpu_msub;
86fpu_fn3	fpu_mov;
87fpu_fn3	fpu_movcf;
88fpu_fn3	fpu_movn;
89fpu_fn3	fpu_movz;
90fpu_fn3	fpu_mul;
91fpu_fn3	fpu_neg;
92fpu_fn4	fpu_nmadd;
93fpu_fn4	fpu_nmsub;
94fpu_fn3	fpu_recip;
95fpu_fn3	fpu_round_l;
96fpu_fn3	fpu_round_w;
97fpu_fn3	fpu_rsqrt;
98fpu_fn3	fpu_sqrt;
99fpu_fn3	fpu_sub;
100fpu_fn3	fpu_trunc_l;
101fpu_fn3	fpu_trunc_w;
102
103/*
104 * Encoding of operand format within opcodes `fmt' and `fmt3' fields.
105 */
106#define	FMT_S	0x00
107#define	FMT_D	0x01
108#define	FMT_W	0x04
109#define	FMT_L	0x05
110
111/*
112 * Inlines from softfloat-specialize.h which are not made public, needed
113 * for fpu_abs.
114 */
115#define	float32_is_nan(a) \
116	(0xff000000 < (a << 1))
117#define	float32_is_signaling_nan(a) \
118	((((a >> 22) & 0x1ff) == 0x1fe) && (a & 0x003fffff))
119
120/*
121 * Precomputed results of intXX_to_floatXX(1)
122 */
123#define	ONE_F32	(float32)(SNG_EXP_BIAS << SNG_FRACBITS)
124#define	ONE_F64	(float64)((uint64_t)DBL_EXP_BIAS << DBL_FRACBITS)
125
126/*
127 * Handle a floating-point exception.
128 */
129void
130MipsFPTrap(struct trapframe *tf)
131{
132	struct cpu_info *ci = curcpu();
133	struct proc *p = ci->ci_curproc;
134	union sigval sv;
135	vaddr_t pc;
136	uint32_t fsr, excbits;
137	uint32_t branch = 0;
138	uint32_t insn;
139	InstFmt inst;
140	int sig = 0;
141	int fault_type = SI_NOINFO;
142	int update_pcb = 0;
143	int emulate = 0;
144#ifdef FPUEMUL
145	int skip_insn = 1;
146#else
147	register_t sr;
148#endif
149
150	KDASSERT(tf == p->p_md.md_regs);
151
152	pc = (vaddr_t)tf->pc;
153	if (tf->cause & CR_BR_DELAY)
154		pc += 4;
155
156#ifndef FPUEMUL
157	/*
158	 * Enable FPU, and read its status register.
159	 */
160
161	sr = getsr();
162	setsr(sr | SR_COP_1_BIT);
163
164	__asm__ volatile ("cfc1 %0, $31" : "=r" (fsr));
165	__asm__ volatile ("cfc1 %0, $31" : "=r" (fsr));
166
167	/*
168	 * If this is not an unimplemented operation, but a genuine
169	 * FPU exception, signal the process.
170	 */
171
172	if ((fsr & FPCSR_C_E) == 0) {
173		sig = SIGFPE;
174		goto deliver;
175	}
176#else
177#ifdef CPU_OCTEON
178	/*
179	 * SR_FR_32 is hardwired to zero on Octeon; make sure it is
180	 * set in the emulation view of the FPU state.
181	 */
182	tf->sr |= SR_FR_32;
183#endif
184#endif	/* FPUEMUL */
185
186	/*
187	 * Get the faulting instruction.  This should not fail, and
188	 * if it does, it's probably not your lucky day.
189	 */
190
191	if (copyin((void *)pc, &insn, sizeof insn) != 0) {
192		sig = SIGBUS;
193		fault_type = BUS_OBJERR;
194		goto deliver;
195	}
196	inst = *(InstFmt *)&insn;
197
198	if (tf->cause & CR_BR_DELAY) {
199		if (copyin32((const void *)tf->pc, &branch) != 0) {
200			sig = SIGBUS;
201			fault_type = BUS_OBJERR;
202			sv.sival_ptr = (void *)tf->pc;
203			goto deliver;
204		}
205	}
206
207	/*
208	 * Emulate the instruction.
209	 */
210
211#ifdef DEBUG
212#ifdef DDB
213	printf("%s: unimplemented FPU completion, fsr 0x%08x\n0x%lx: ",
214	    p->p_p->ps_comm, fsr, pc);
215	dbmd_print_insn(insn, pc, printf);
216#else
217	printf("%s: unimplemented FPU completion, insn 0x%08x fsr 0x%08x\n",
218	    p->p_p->ps_comm, insn, fsr);
219#endif
220#endif
221
222	switch (inst.FRType.op) {
223	default:
224		/*
225		 * Not a FPU instruction.
226		 */
227		break;
228#ifdef FPUEMUL
229	case OP_SPECIAL:
230		switch (inst.FRType.func) {
231		default:
232			/*
233			 * Not a FPU instruction.
234			 */
235			break;
236		case OP_MOVCI:
237			/*
238			 * This instruction should not require emulation,
239			 * unless there is no FPU.
240			 */
241			emulate = 1;
242			break;
243		}
244		break;
245	case OP_LDC1:
246	case OP_LWC1:
247	case OP_SDC1:
248	case OP_SWC1:
249		/*
250		 * These instructions should not require emulation,
251		 * unless there is no FPU.
252		 */
253		emulate = 1;
254		break;
255#endif
256	case OP_COP1:
257		switch (inst.RType.rs) {
258		case OP_BC:
259#ifdef FPUEMUL
260			skip_insn = 0;
261#endif
262			/* FALLTHROUGH */
263		case OP_MF:
264		case OP_DMF:
265		case OP_CF:
266		case OP_MT:
267		case OP_DMT:
268		case OP_CT:
269			/*
270			 * These instructions should not require emulation,
271			 * unless there is no FPU.
272			 */
273#ifdef FPUEMUL
274			emulate = 1;
275#endif
276			break;
277		default:
278			emulate = 1;
279			break;
280		}
281		break;
282	case OP_COP1X:
283		switch (inst.FQType.op4) {
284		default:
285			switch (inst.FRType.func) {
286#ifdef FPUEMUL
287			case OP_LDXC1:
288			case OP_LWXC1:
289			case OP_SDXC1:
290			case OP_SWXC1:
291			case OP_PREFX:
292				/*
293				 * These instructions should not require
294				 * emulation, unless there is no FPU.
295				 */
296				emulate = 1;
297				break;
298#endif
299			default:
300				/*
301				 * Not a valid instruction.
302				 */
303				break;
304			}
305			break;
306		case OP_MADD:
307		case OP_MSUB:
308		case OP_NMADD:
309		case OP_NMSUB:
310			emulate = 1;
311			break;
312		}
313		break;
314	}
315
316	if (emulate) {
317#ifndef FPUEMUL
318		KASSERT(p == ci->ci_fpuproc);
319		save_fpu();
320#endif
321		update_pcb = 1;
322
323		sig = fpu_emulate(p, tf, insn, &sv);
324		/* reload fsr, possibly modified by softfloat code */
325		fsr = tf->fsr;
326		if (sig == 0) {
327			/* raise SIGFPE if necessary */
328			excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
329			excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
330			if (excbits != 0)
331				sig = SIGFPE;
332		}
333	} else {
334		sig = SIGILL;
335		fault_type = ILL_ILLOPC;
336	}
337
338deliver:
339	switch (sig) {
340	case SIGFPE:
341		excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
342		excbits &= (fsr & FPCSR_E_MASK) >> FPCSR_E_SHIFT;
343		if (excbits & FP_X_INV)
344			fault_type = FPE_FLTINV;
345		else if (excbits & FP_X_DZ)
346			fault_type = FPE_INTDIV;
347		else if (excbits & FP_X_OFL)
348			fault_type = FPE_FLTUND;
349		else if (excbits & FP_X_UFL)
350			fault_type = FPE_FLTOVF;
351		else /* if (excbits & FP_X_IMP) */
352			fault_type = FPE_FLTRES;
353
354		break;
355#ifdef FPUEMUL
356	case SIGBUS:
357		fault_type = BUS_ADRALN;
358		break;
359	case SIGSEGV:
360		fault_type = SEGV_MAPERR;
361		break;
362#endif
363	}
364
365	/*
366	 * Skip the instruction, unless we are delivering SIGILL.
367	 */
368#ifdef FPUEMUL
369	if (skip_insn) {
370#endif
371		if (sig != SIGILL) {
372			if (tf->cause & CR_BR_DELAY) {
373				/*
374				 * Note that it doesn't matter, at this point,
375				 * that we pass the updated FSR value, as it is
376				 * only used to decide whether to branch or not
377				 * if the faulting instruction was BC1[FT].
378				 */
379				tf->pc = MipsEmulateBranch(tf, tf->pc, fsr,
380				    branch);
381			} else
382				tf->pc += 4;
383		}
384#ifdef FPUEMUL
385	}
386#endif
387
388	/*
389	 * Update the FPU status register.
390	 * We need to make sure that this will not cause an exception
391	 * in kernel mode.
392	 */
393
394	/* propagate raised exceptions to the sticky bits */
395	fsr &= ~FPCSR_C_E;
396	excbits = (fsr & FPCSR_C_MASK) >> FPCSR_C_SHIFT;
397	fsr |= excbits << FPCSR_F_SHIFT;
398	/* clear all exception sources */
399	fsr &= ~FPCSR_C_MASK;
400	if (update_pcb)
401		tf->fsr = fsr;
402#ifndef FPUEMUL
403	__asm__ volatile ("ctc1 %0, $31" :: "r" (fsr));
404	/* disable fpu before returning to trap() */
405	setsr(sr);
406#endif
407
408	if (sig != 0) {
409#ifdef FPUEMUL
410		if (sig != SIGBUS && sig != SIGSEGV)
411#endif
412			sv.sival_ptr = (void *)pc;
413		KERNEL_LOCK();
414		trapsignal(p, sig, 0, fault_type, sv);
415		KERNEL_UNLOCK();
416	}
417}
418
419/*
420 * Emulate an FPU instruction.  The FPU register set has been saved in the
421 * current PCB, and is pointed to by the trap frame.
422 */
423int
424fpu_emulate(struct proc *p, struct trapframe *tf, uint32_t insn,
425    union sigval *sv)
426{
427	InstFmt inst;
428
429	tf->zero = 0;	/* not written by trap code */
430
431	inst = *(InstFmt *)&insn;
432	switch (inst.FRType.op) {
433	default:
434		break;
435#ifdef FPUEMUL
436	case OP_SPECIAL:
437		return nofpu_emulate_movci(tf, insn);
438	case OP_LDC1:
439	case OP_LWC1:
440	case OP_SDC1:
441	case OP_SWC1:
442		return nofpu_emulate_loadstore(p, tf, insn, sv);
443#endif
444	case OP_COP1:
445		switch (inst.RType.rs) {
446#ifdef FPUEMUL
447		case OP_MF:
448		case OP_DMF:
449		case OP_CF:
450		case OP_MT:
451		case OP_DMT:
452		case OP_CT:
453		case OP_BC:
454			return nofpu_emulate_cop1(p, tf, insn, sv);
455#endif
456		default:
457			return fpu_emulate_cop1(p, tf, insn);
458		}
459		break;
460	case OP_COP1X:
461		switch (inst.FQType.op4) {
462#ifdef FPUEMUL
463		default:
464			switch (inst.FRType.func) {
465			case OP_LDXC1:
466			case OP_LWXC1:
467			case OP_SDXC1:
468			case OP_SWXC1:
469			case OP_PREFX:
470				return nofpu_emulate_cop1x(p, tf, insn, sv);
471			default:
472				break;
473			}
474			break;
475		case OP_MADD:
476		case OP_MSUB:
477		case OP_NMADD:
478		case OP_NMSUB:
479			return fpu_emulate_cop1x(p, tf, insn);
480#else
481		default:
482			return fpu_emulate_cop1x(p, tf, insn);
483#endif
484		}
485	}
486
487	return SIGILL;
488}
489
490/*
491 * Emulate a COP1 FPU instruction.
492 */
493int
494fpu_emulate_cop1(struct proc *p, struct trapframe *tf, uint32_t insn)
495{
496	InstFmt inst;
497	uint ft, fs, fd;
498	fpu_fn3 *fpu_op;
499	static fpu_fn3 *const fpu_ops1[1 << 6] = {
500		fpu_add,		/* 0x00 */
501		fpu_sub,
502		fpu_mul,
503		fpu_div,
504		fpu_sqrt,
505		fpu_abs,
506		fpu_mov,
507		fpu_neg,
508		fpu_round_l,		/* 0x08 */
509		fpu_trunc_l,
510		fpu_ceil_l,
511		fpu_floor_l,
512		fpu_round_w,
513		fpu_trunc_w,
514		fpu_ceil_w,
515		fpu_floor_w,
516		NULL,			/* 0x10 */
517		fpu_movcf,
518		fpu_movz,
519		fpu_movn,
520		NULL,
521		fpu_recip,
522		fpu_rsqrt,
523		NULL,
524		NULL,			/* 0x18 */
525		NULL,
526		NULL,
527		NULL,
528		NULL,
529		NULL,
530		NULL,
531		NULL,
532		fpu_cvt_s,		/* 0x20 */
533		fpu_cvt_d,
534		NULL,
535		NULL,
536		fpu_cvt_w,
537		fpu_cvt_l,
538		NULL,
539		NULL,
540		NULL,			/* 0x28 */
541		NULL,
542		NULL,
543		NULL,
544		NULL,
545		NULL,
546		NULL,
547		NULL,
548		(fpu_fn3 *)fpu_c,	/* 0x30 */
549		(fpu_fn3 *)fpu_c,
550		(fpu_fn3 *)fpu_c,
551		(fpu_fn3 *)fpu_c,
552		(fpu_fn3 *)fpu_c,
553		(fpu_fn3 *)fpu_c,
554		(fpu_fn3 *)fpu_c,
555		(fpu_fn3 *)fpu_c,
556		(fpu_fn3 *)fpu_c,	/* 0x38 */
557		(fpu_fn3 *)fpu_c,
558		(fpu_fn3 *)fpu_c,
559		(fpu_fn3 *)fpu_c,
560		(fpu_fn3 *)fpu_c,
561		(fpu_fn3 *)fpu_c,
562		(fpu_fn3 *)fpu_c,
563		(fpu_fn3 *)fpu_c
564	};
565
566	inst = *(InstFmt *)&insn;
567
568	/*
569	 * Check for valid function code.
570	 */
571
572	fpu_op = fpu_ops1[inst.FRType.func];
573	if (fpu_op == NULL)
574		return SIGILL;
575
576	/*
577	 * Check for valid format.  FRType assumes bit 25 is always set,
578	 * so we need to check for it explicitely.
579	 */
580
581	if ((insn & (1 << 25)) == 0)
582		return SIGILL;
583	switch (inst.FRType.fmt) {
584	default:
585		return SIGILL;
586	case FMT_S:
587	case FMT_D:
588	case FMT_W:
589	case FMT_L:
590		break;
591	}
592
593	/*
594	 * Check for valid register values. Only even-numbered registers
595	 * can be used if the FR bit is clear in coprocessor 0 status
596	 * register.
597	 *
598	 * Note that c.cond does not specify a register number in the fd
599	 * field, but the fd field must have zero in its low two bits, so
600	 * the test will not reject valid c.cond instructions.
601	 */
602
603	ft = inst.FRType.ft;
604	fs = inst.FRType.fs;
605	fd = inst.FRType.fd;
606	if ((tf->sr & SR_FR_32) == 0) {
607		if ((ft | fs | fd) & 1)
608			return SIGILL;
609	}
610
611	/*
612	 * Finally dispatch to the proper routine.
613	 */
614
615	if (fpu_op == (fpu_fn3 *)&fpu_c)
616		return
617		    fpu_c(p, tf, inst.FRType.fmt, ft, fs, fd, inst.FRType.func);
618	else
619		return (*fpu_op)(p, tf, inst.FRType.fmt, ft, fs, fd);
620}
621
622/*
623 * Emulate a COP1X FPU instruction.
624 */
625int
626fpu_emulate_cop1x(struct proc *p, struct trapframe *tf, uint32_t insn)
627{
628	InstFmt inst;
629	uint fr, ft, fs, fd;
630	fpu_fn4 *fpu_op;
631	static fpu_fn4 *const fpu_ops1x[1 << 3] = {
632		NULL,
633		NULL,
634		NULL,
635		NULL,
636		fpu_madd,
637		fpu_msub,
638		fpu_nmadd,
639		fpu_nmsub
640	};
641
642	inst = *(InstFmt *)&insn;
643
644	/*
645	 * Check for valid function code.
646	 */
647
648	fpu_op = fpu_ops1x[inst.FQType.op4];
649	if (fpu_op == NULL)
650		return SIGILL;
651
652	/*
653	 * Check for valid format.
654	 */
655
656	switch (inst.FQType.fmt3) {
657	default:
658		return SIGILL;
659	case FMT_S:
660	case FMT_D:
661	case FMT_W:
662	case FMT_L:
663		break;
664	}
665
666	/*
667	 * Check for valid register values. Only even-numbered registers
668	 * can be used if the FR bit is clear in coprocessor 0 status
669	 * register.
670	 */
671
672	fr = inst.FQType.fr;
673	ft = inst.FQType.ft;
674	fs = inst.FQType.fs;
675	fd = inst.FQType.fd;
676	if ((tf->sr & SR_FR_32) == 0) {
677		if ((fr | ft | fs | fd) & 1)
678			return SIGILL;
679	}
680
681	/*
682	 * Finally dispatch to the proper routine.
683	 */
684
685	return (*fpu_op)(p, tf, inst.FRType.fmt, fr, ft, fs, fd);
686}
687
688/*
689 * Load a floating-point argument according to the specified format.
690 */
691uint64_t
692fpu_load(struct proc *p, struct trapframe *tf, uint fmt, uint regno)
693{
694	uint64_t tmp, tmp2;
695
696	tmp = ((uint64_t *)p->p_md.md_regs)[FPBASE + regno];
697	if (tf->sr & SR_FR_32) {
698		switch (fmt) {
699		case FMT_D:
700		case FMT_L:
701			break;
702		case FMT_S:
703		case FMT_W:
704			tmp &= 0xffffffff;
705			break;
706		}
707	} else {
708		tmp &= 0xffffffff;
709		switch (fmt) {
710		case FMT_D:
711		case FMT_L:
712			/* caller has enforced regno is even */
713			tmp2 =
714			    ((uint64_t *)p->p_md.md_regs)[FPBASE + regno + 1];
715			tmp |= tmp2 << 32;
716			break;
717		case FMT_S:
718		case FMT_W:
719			break;
720		}
721	}
722
723	return tmp;
724}
725
726/*
727 * Store a floating-point result according to the specified format.
728 */
729void
730fpu_store(struct proc *p, struct trapframe *tf, uint fmt, uint regno,
731    uint64_t rslt)
732{
733	if (tf->sr & SR_FR_32) {
734		((uint64_t *)p->p_md.md_regs)[FPBASE + regno] = rslt;
735	} else {
736		/* caller has enforced regno is even */
737		((uint64_t *)p->p_md.md_regs)[FPBASE + regno] =
738		    rslt & 0xffffffff;
739		((uint64_t *)p->p_md.md_regs)[FPBASE + regno + 1] =
740		    (rslt >> 32) & 0xffffffff;
741	}
742}
743
744/*
745 * Integer conversion
746 */
747
748int
749fpu_int_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
750    uint fd, uint rm)
751{
752	uint64_t raw;
753	uint32_t oldrm;
754
755	if (ft != 0)
756		return SIGILL;
757	if (fmt != FMT_S && fmt != FMT_D)
758		return SIGILL;
759
760	raw = fpu_load(p, tf, fmt, fs);
761
762	/* round towards required mode */
763	oldrm = tf->fsr & FPCSR_RM_MASK;
764	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
765	if (fmt == FMT_S)
766		raw = float32_to_int64((float32)raw);
767	else
768		raw = float64_to_int64((float64)raw);
769	/* restore rounding mode */
770	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;
771
772	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
773		fpu_store(p, tf, fmt, fd, raw);
774
775	return 0;
776}
777
778int
779fpu_int_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
780    uint fd, uint rm)
781{
782	uint64_t raw;
783	uint32_t oldrm;
784
785	if (ft != 0)
786		return SIGILL;
787	if (fmt != FMT_S && fmt != FMT_D)
788		return SIGILL;
789
790	raw = fpu_load(p, tf, fmt, fs);
791
792	/* round towards required mode */
793	oldrm = tf->fsr & FPCSR_RM_MASK;
794	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | rm;
795	if (fmt == FMT_S)
796		raw = float32_to_int32((float32)raw);
797	else
798		raw = float64_to_int32((float64)raw);
799	/* restore rounding mode */
800	tf->fsr = (tf->fsr & ~FPCSR_RM_MASK) | oldrm;
801
802	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
803		fpu_store(p, tf, fmt, fd, raw);
804
805	return 0;
806}
807
808/*
809 * FPU Instruction emulation
810 */
811
812int
813fpu_abs(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
814    uint fd)
815{
816	uint64_t raw;
817
818	if (ft != 0)
819		return SIGILL;
820	if (fmt != FMT_S && fmt != FMT_D)
821		return SIGILL;
822
823	raw = fpu_load(p, tf, fmt, fs);
824	/* clear sign bit unless NaN */
825	if (fmt == FMT_S) {
826		float32 f32 = (float32)raw;
827		if (float32_is_nan(f32)) {
828			float_set_invalid();
829		} else {
830			f32 &= ~(1L << 31);
831			raw = (uint64_t)f32;
832		}
833	} else {
834		float64 f64 = (float64)raw;
835		if (float64_is_nan(f64)) {
836			float_set_invalid();
837		} else {
838			f64 &= ~(1L << 63);
839			raw = (uint64_t)f64;
840		}
841	}
842	fpu_store(p, tf, fmt, fd, raw);
843
844	return 0;
845}
846
847int
848fpu_add(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
849    uint fd)
850{
851	uint64_t raw1, raw2, rslt;
852
853	if (fmt != FMT_S && fmt != FMT_D)
854		return SIGILL;
855
856	raw1 = fpu_load(p, tf, fmt, fs);
857	raw2 = fpu_load(p, tf, fmt, ft);
858	if (fmt == FMT_S) {
859		float32 f32 = float32_add((float32)raw1, (float32)raw2);
860		rslt = (uint64_t)f32;
861	} else {
862		float64 f64 = float64_add((float64)raw1, (float64)raw2);
863		rslt = (uint64_t)f64;
864	}
865	fpu_store(p, tf, fmt, fd, rslt);
866
867	return 0;
868}
869
870int
871fpu_c(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
872    uint fd, uint op)
873{
874	uint64_t raw1, raw2;
875	uint cc, lt, eq, uo;
876
877	if ((fd & 0x03) != 0)
878		return SIGILL;
879	if (fmt != FMT_S && fmt != FMT_D)
880		return SIGILL;
881
882	lt = eq = uo = 0;
883	cc = fd >> 2;
884
885	raw1 = fpu_load(p, tf, fmt, fs);
886	raw2 = fpu_load(p, tf, fmt, ft);
887
888	if (fmt == FMT_S) {
889		float32 f32a = (float32)raw1;
890		float32 f32b = (float32)raw2;
891		if (float32_is_nan(f32a)) {
892			uo = 1 << 0;
893			if (float32_is_signaling_nan(f32a))
894				op |= 0x08;	/* force invalid exception */
895		}
896		if (float32_is_nan(f32b)) {
897			uo = 1 << 0;
898			if (float32_is_signaling_nan(f32b))
899				op |= 0x08;	/* force invalid exception */
900		}
901		if (uo == 0) {
902			if (float32_eq(f32a, f32b))
903				eq = 1 << 1;
904			else if (float32_lt(f32a, f32b))
905				lt = 1 << 2;
906		}
907	} else {
908		float64 f64a = (float64)raw1;
909		float64 f64b = (float64)raw2;
910		if (float64_is_nan(f64a)) {
911			uo = 1 << 0;
912			if (float64_is_signaling_nan(f64a))
913				op |= 0x08;	/* force invalid exception */
914		}
915		if (float64_is_nan(f64b)) {
916			uo = 1 << 0;
917			if (float64_is_signaling_nan(f64b))
918				op |= 0x08;	/* force invalid exception */
919		}
920		if (uo == 0) {
921			if (float64_eq(f64a, f64b))
922				eq = 1 << 1;
923			else if (float64_lt(f64a, f64b))
924				lt = 1 << 2;
925		}
926	}
927
928	if (uo && (op & 0x08)) {
929		float_set_invalid();
930		if (tf->fsr & FPCSR_E_V) {
931			/* comparison result intentionaly not written */
932			goto skip;
933		}
934	} else {
935		if ((uo | eq | lt) & op)
936			tf->fsr |= FPCSR_CONDVAL(cc);
937		else
938			tf->fsr &= ~FPCSR_CONDVAL(cc);
939	}
940skip:
941
942	return 0;
943}
944
945int
946fpu_ceil_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
947    uint fd)
948{
949	/* round towards positive infinity */
950	return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RP);
951}
952
953int
954fpu_ceil_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
955    uint fd)
956{
957	/* round towards positive infinity */
958	return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RP);
959}
960
961int
962fpu_cvt_d(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
963    uint fd)
964{
965	uint64_t raw;
966
967	if (ft != 0)
968		return SIGILL;
969	if (fmt == FMT_D)
970		return SIGILL;
971
972	raw = fpu_load(p, tf, fmt, fs);
973	switch (fmt) {
974	case FMT_L:
975		raw = int64_to_float64((int64_t)raw);
976		break;
977	case FMT_S:
978		raw = float32_to_float64((float32)raw);
979		break;
980	case FMT_W:
981		raw = int32_to_float64((int32_t)raw);
982		break;
983	}
984	fpu_store(p, tf, fmt, fd, raw);
985
986	return 0;
987}
988
989int
990fpu_cvt_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
991    uint fd)
992{
993	uint64_t raw;
994	uint32_t rm;
995
996	if (ft != 0)
997		return SIGILL;
998	if (fmt != FMT_S && fmt != FMT_D)
999		return SIGILL;
1000
1001	rm = tf->fsr & FPCSR_RM_MASK;
1002	raw = fpu_load(p, tf, fmt, fs);
1003	if (fmt == FMT_D) {
1004		if (rm == FP_RZ)
1005			raw = float64_to_int64_round_to_zero((float64)raw);
1006		else
1007			raw = float64_to_int64((float64)raw);
1008	} else {
1009		if (rm == FP_RZ)
1010			raw = float32_to_int64_round_to_zero((float32)raw);
1011		else
1012			raw = float32_to_int64((float32)raw);
1013	}
1014	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
1015		fpu_store(p, tf, fmt, fd, raw);
1016
1017	return 0;
1018}
1019
1020int
1021fpu_cvt_s(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1022    uint fd)
1023{
1024	uint64_t raw;
1025
1026	if (ft != 0)
1027		return SIGILL;
1028	if (fmt == FMT_S)
1029		return SIGILL;
1030
1031	raw = fpu_load(p, tf, fmt, fs);
1032	switch (fmt) {
1033	case FMT_D:
1034		raw = float64_to_float32((float64)raw);
1035		break;
1036	case FMT_L:
1037		raw = int64_to_float32((int64_t)raw);
1038		break;
1039	case FMT_W:
1040		raw = int32_to_float32((int32_t)raw);
1041		break;
1042	}
1043	fpu_store(p, tf, fmt, fd, raw);
1044
1045	return 0;
1046}
1047
1048int
1049fpu_cvt_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1050    uint fd)
1051{
1052	uint64_t raw;
1053	uint32_t rm;
1054
1055	if (ft != 0)
1056		return SIGILL;
1057	if (fmt != FMT_S && fmt != FMT_D)
1058		return SIGILL;
1059
1060	rm = tf->fsr & FPCSR_RM_MASK;
1061	raw = fpu_load(p, tf, fmt, fs);
1062	if (fmt == FMT_D) {
1063		if (rm == FP_RZ)
1064			raw = float64_to_int32_round_to_zero((float64)raw);
1065		else
1066			raw = float64_to_int32((float64)raw);
1067	} else {
1068		if (rm == FP_RZ)
1069			raw = float32_to_int32_round_to_zero((float32)raw);
1070		else
1071			raw = float32_to_int32((float32)raw);
1072	}
1073	if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) != (FPCSR_C_V | FPCSR_E_V))
1074		fpu_store(p, tf, fmt, fd, raw);
1075
1076	return 0;
1077}
1078
1079int
1080fpu_div(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1081    uint fd)
1082{
1083	uint64_t raw1, raw2, rslt;
1084
1085	if (fmt != FMT_S && fmt != FMT_D)
1086		return SIGILL;
1087
1088	raw1 = fpu_load(p, tf, fmt, fs);
1089	raw2 = fpu_load(p, tf, fmt, ft);
1090	if (fmt == FMT_S) {
1091		float32 f32 = float32_div((float32)raw1, (float32)raw2);
1092		rslt = (uint64_t)f32;
1093	} else {
1094		float64 f64 = float64_div((float64)raw1, (float64)raw2);
1095		rslt = (uint64_t)f64;
1096	}
1097	fpu_store(p, tf, fmt, fd, rslt);
1098
1099	return 0;
1100}
1101
1102int
1103fpu_floor_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1104    uint fd)
1105{
1106	/* round towards negative infinity */
1107	return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RM);
1108}
1109
1110int
1111fpu_floor_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1112    uint fd)
1113{
1114	/* round towards negative infinity */
1115	return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RM);
1116}
1117
1118int
1119fpu_madd(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
1120    uint fs, uint fd)
1121{
1122	uint64_t raw1, raw2, raw3, rslt;
1123
1124	if (fmt != FMT_S && fmt != FMT_D)
1125		return SIGILL;
1126
1127	raw1 = fpu_load(p, tf, fmt, fs);
1128	raw2 = fpu_load(p, tf, fmt, ft);
1129	raw3 = fpu_load(p, tf, fmt, fr);
1130	if (fmt == FMT_S) {
1131		float32 f32 = float32_add(
1132		    float32_mul((float32)raw1, (float32)raw2),
1133		    (float32)raw3);
1134		rslt = (uint64_t)f32;
1135	} else {
1136		float64 f64 = float64_add(
1137		    float64_mul((float64)raw1, (float64)raw2),
1138		    (float64)raw3);
1139		rslt = (uint64_t)f64;
1140	}
1141	fpu_store(p, tf, fmt, fd, rslt);
1142
1143	return 0;
1144}
1145
1146int
1147fpu_mov(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1148    uint fd)
1149{
1150	uint64_t raw;
1151
1152	if (ft != 0)
1153		return SIGILL;
1154	if (fmt != FMT_S && fmt != FMT_D)
1155		return SIGILL;
1156
1157	raw = fpu_load(p, tf, fmt, fs);
1158	fpu_store(p, tf, fmt, fd, raw);
1159
1160	return 0;
1161}
1162
1163int
1164fpu_movcf(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1165    uint fd)
1166{
1167	uint64_t raw;
1168	uint cc, istf;
1169	int condition;
1170
1171	if ((ft & 0x02) != 0)
1172		return SIGILL;
1173	cc = ft >> 2;
1174	if (fmt != FMT_S && fmt != FMT_D)
1175		return SIGILL;
1176
1177	condition = tf->fsr & FPCSR_CONDVAL(cc);
1178	istf = ft & COPz_BC_TF_MASK;
1179	if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
1180		raw = fpu_load(p, tf, fmt, fs);
1181		fpu_store(p, tf, fmt, fd, raw);
1182	}
1183
1184	return 0;
1185}
1186
1187int
1188fpu_movn(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1189    uint fd)
1190{
1191	register_t *regs = (register_t *)tf;
1192	uint64_t raw;
1193
1194	if (fmt != FMT_S && fmt != FMT_D)
1195		return SIGILL;
1196
1197	if (ft != ZERO && regs[ft] != 0) {
1198		raw = fpu_load(p, tf, fmt, fs);
1199		fpu_store(p, tf, fmt, fd, raw);
1200	}
1201
1202	return 0;
1203}
1204
1205int
1206fpu_movz(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1207    uint fd)
1208{
1209	register_t *regs = (register_t *)tf;
1210	uint64_t raw;
1211
1212	if (fmt != FMT_S && fmt != FMT_D)
1213		return SIGILL;
1214
1215	if (ft == ZERO || regs[ft] == 0) {
1216		raw = fpu_load(p, tf, fmt, fs);
1217		fpu_store(p, tf, fmt, fd, raw);
1218	}
1219
1220	return 0;
1221}
1222
1223int
1224fpu_msub(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
1225    uint fs, uint fd)
1226{
1227	uint64_t raw1, raw2, raw3, rslt;
1228
1229	if (fmt != FMT_S && fmt != FMT_D)
1230		return SIGILL;
1231
1232	raw1 = fpu_load(p, tf, fmt, fs);
1233	raw2 = fpu_load(p, tf, fmt, ft);
1234	raw3 = fpu_load(p, tf, fmt, fr);
1235	if (fmt == FMT_S) {
1236		float32 f32 = float32_sub(
1237		    float32_mul((float32)raw1, (float32)raw2),
1238		    (float32)raw3);
1239		rslt = (uint64_t)f32;
1240	} else {
1241		float64 f64 = float64_sub(
1242		    float64_mul((float64)raw1, (float64)raw2),
1243		    (float64)raw3);
1244		rslt = (uint64_t)f64;
1245	}
1246	fpu_store(p, tf, fmt, fd, rslt);
1247
1248	return 0;
1249}
1250
1251int
1252fpu_mul(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1253    uint fd)
1254{
1255	uint64_t raw1, raw2, rslt;
1256
1257	if (fmt != FMT_S && fmt != FMT_D)
1258		return SIGILL;
1259
1260	raw1 = fpu_load(p, tf, fmt, fs);
1261	raw2 = fpu_load(p, tf, fmt, ft);
1262	if (fmt == FMT_S) {
1263		float32 f32 = float32_mul((float32)raw1, (float32)raw2);
1264		rslt = (uint64_t)f32;
1265	} else {
1266		float64 f64 = float64_mul((float64)raw1, (float64)raw2);
1267		rslt = (uint64_t)f64;
1268	}
1269	fpu_store(p, tf, fmt, fd, rslt);
1270
1271	return 0;
1272}
1273
1274int
1275fpu_neg(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1276    uint fd)
1277{
1278	uint64_t raw;
1279
1280	if (ft != 0)
1281		return SIGILL;
1282	if (fmt != FMT_S && fmt != FMT_D)
1283		return SIGILL;
1284
1285	raw = fpu_load(p, tf, fmt, fs);
1286	/* flip sign bit unless NaN */
1287	if (fmt == FMT_S) {
1288		float32 f32 = (float32)raw;
1289		if (float32_is_nan(f32)) {
1290			float_set_invalid();
1291		} else {
1292			f32 ^= 1L << 31;
1293			raw = (uint64_t)f32;
1294		}
1295	} else {
1296		float64 f64 = (float64)raw;
1297		if (float64_is_nan(f64)) {
1298			float_set_invalid();
1299		} else {
1300			f64 ^= 1L << 63;
1301			raw = (uint64_t)f64;
1302		}
1303	}
1304	fpu_store(p, tf, fmt, fd, raw);
1305
1306	return 0;
1307}
1308
1309int
1310fpu_nmadd(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
1311    uint fs, uint fd)
1312{
1313	uint64_t raw1, raw2, raw3, rslt;
1314
1315	if (fmt != FMT_S && fmt != FMT_D)
1316		return SIGILL;
1317
1318	raw1 = fpu_load(p, tf, fmt, fs);
1319	raw2 = fpu_load(p, tf, fmt, ft);
1320	raw3 = fpu_load(p, tf, fmt, fr);
1321	if (fmt == FMT_S) {
1322		float32 f32 = float32_add(
1323		    float32_mul((float32)raw1, (float32)raw2),
1324		    (float32)raw3);
1325		if (float32_is_nan(f32))
1326			float_set_invalid();
1327		else
1328			f32 ^= 1L << 31;
1329		rslt = (uint64_t)f32;
1330	} else {
1331		float64 f64 = float64_add(
1332		    float64_mul((float64)raw1, (float64)raw2),
1333		    (float64)raw3);
1334		if (float64_is_nan(f64))
1335			float_set_invalid();
1336		else
1337			f64 ^= 1L << 63;
1338		rslt = (uint64_t)f64;
1339	}
1340	fpu_store(p, tf, fmt, fd, rslt);
1341
1342	return 0;
1343}
1344
1345int
1346fpu_nmsub(struct proc *p, struct trapframe *tf, uint fmt, uint fr, uint ft,
1347    uint fs, uint fd)
1348{
1349	uint64_t raw1, raw2, raw3, rslt;
1350
1351	if (fmt != FMT_S && fmt != FMT_D)
1352		return SIGILL;
1353
1354	raw1 = fpu_load(p, tf, fmt, fs);
1355	raw2 = fpu_load(p, tf, fmt, ft);
1356	raw3 = fpu_load(p, tf, fmt, fr);
1357	if (fmt == FMT_S) {
1358		float32 f32 = float32_sub(
1359		    float32_mul((float32)raw1, (float32)raw2),
1360		    (float32)raw3);
1361		if (float32_is_nan(f32))
1362			float_set_invalid();
1363		else
1364			f32 ^= 1L << 31;
1365		rslt = (uint64_t)f32;
1366	} else {
1367		float64 f64 = float64_sub(
1368		    float64_mul((float64)raw1, (float64)raw2),
1369		    (float64)raw3);
1370		if (float64_is_nan(f64))
1371			float_set_invalid();
1372		else
1373			f64 ^= 1L << 63;
1374		rslt = (uint64_t)f64;
1375	}
1376	fpu_store(p, tf, fmt, fd, rslt);
1377
1378	return 0;
1379}
1380
1381int
1382fpu_recip(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1383    uint fd)
1384{
1385	uint64_t raw;
1386
1387	if (ft != 0)
1388		return SIGILL;
1389	if (fmt != FMT_S && fmt != FMT_D)
1390		return SIGILL;
1391
1392	raw = fpu_load(p, tf, fmt, fs);
1393	if (fmt == FMT_S) {
1394		float32 f32 = float32_div(ONE_F32, (float32)raw);
1395		raw = (uint64_t)f32;
1396	} else {
1397		float64 f64 = float64_div(ONE_F64, (float64)raw);
1398		raw = (uint64_t)f64;
1399	}
1400	fpu_store(p, tf, fmt, fd, raw);
1401
1402	return 0;
1403}
1404
1405int
1406fpu_round_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1407    uint fd)
1408{
1409	/* round towards nearest */
1410	return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RN);
1411}
1412
1413int
1414fpu_round_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1415    uint fd)
1416{
1417	/* round towards nearest */
1418	return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RN);
1419}
1420
1421int
1422fpu_rsqrt(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1423    uint fd)
1424{
1425	uint64_t raw;
1426
1427	if (ft != 0)
1428		return SIGILL;
1429	if (fmt != FMT_S && fmt != FMT_D)
1430		return SIGILL;
1431
1432	raw = fpu_load(p, tf, fmt, fs);
1433	if (fmt == FMT_S) {
1434		float32 f32 = float32_sqrt((float32)raw);
1435		if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
1436		    (FPCSR_C_V | FPCSR_E_V))
1437			f32 = float32_div(ONE_F32, f32);
1438		raw = (uint64_t)f32;
1439	} else {
1440		float64 f64 = float64_sqrt((float64)raw);
1441		if ((tf->fsr & (FPCSR_C_V | FPCSR_E_V)) !=
1442		    (FPCSR_C_V | FPCSR_E_V))
1443			f64 = float64_div(ONE_F64, f64);
1444		raw = (uint64_t)f64;
1445	}
1446	fpu_store(p, tf, fmt, fd, raw);
1447
1448	return 0;
1449}
1450
1451int
1452fpu_sqrt(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1453    uint fd)
1454{
1455	uint64_t raw;
1456
1457	if (ft != 0)
1458		return SIGILL;
1459	if (fmt != FMT_S && fmt != FMT_D)
1460		return SIGILL;
1461
1462	raw = fpu_load(p, tf, fmt, fs);
1463	if (fmt == FMT_S) {
1464		float32 f32 = float32_sqrt((float32)raw);
1465		raw = (uint64_t)f32;
1466	} else {
1467		float64 f64 = float64_sqrt((float64)raw);
1468		raw = (uint64_t)f64;
1469	}
1470	fpu_store(p, tf, fmt, fd, raw);
1471
1472	return 0;
1473}
1474
1475int
1476fpu_sub(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1477    uint fd)
1478{
1479	uint64_t raw1, raw2, rslt;
1480
1481	if (fmt != FMT_S && fmt != FMT_D)
1482		return SIGILL;
1483
1484	raw1 = fpu_load(p, tf, fmt, fs);
1485	raw2 = fpu_load(p, tf, fmt, ft);
1486	if (fmt == FMT_S) {
1487		float32 f32 = float32_sub((float32)raw1, (float32)raw2);
1488		rslt = (uint64_t)f32;
1489	} else {
1490		float64 f64 = float64_sub((float64)raw1, (float64)raw2);
1491		rslt = (uint64_t)f64;
1492	}
1493	fpu_store(p, tf, fmt, fd, rslt);
1494
1495	return 0;
1496}
1497
1498int
1499fpu_trunc_l(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1500    uint fd)
1501{
1502	/* round towards zero */
1503	return fpu_int_l(p, tf, fmt, ft, fs, fd, FP_RZ);
1504}
1505
1506int
1507fpu_trunc_w(struct proc *p, struct trapframe *tf, uint fmt, uint ft, uint fs,
1508    uint fd)
1509{
1510	/* round towards zero */
1511	return fpu_int_w(p, tf, fmt, ft, fs, fd, FP_RZ);
1512}
1513
1514#ifdef FPUEMUL
1515
1516/*
1517 * Emulate a COP1 non-FPU instruction.
1518 */
1519int
1520nofpu_emulate_cop1(struct proc *p, struct trapframe *tf, uint32_t insn,
1521    union sigval *sv)
1522{
1523	register_t *regs = (register_t *)tf;
1524	InstFmt inst;
1525	int32_t cval;
1526
1527	inst = *(InstFmt *)&insn;
1528
1529	switch (inst.RType.rs) {
1530	case OP_MF:
1531		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1532			return SIGILL;
1533		if (inst.FRType.ft != ZERO)
1534			regs[inst.FRType.ft] = (int32_t)
1535			    ((uint64_t *)p->p_md.md_regs)
1536			      [FPBASE + inst.FRType.fs];
1537		break;
1538	case OP_DMF:
1539		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1540			return SIGILL;
1541		if ((tf->sr & SR_FR_32) != 0 || (inst.FRType.fs & 1) == 0) {
1542			if (inst.FRType.ft != ZERO)
1543				regs[inst.FRType.ft] =
1544				    fpu_load(p, tf, FMT_L, inst.FRType.fs);
1545		}
1546		break;
1547	case OP_CF:
1548		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1549			return SIGILL;
1550		if (inst.FRType.ft != ZERO) {
1551			switch (inst.FRType.fs) {
1552			case 0:	/* FPC_ID */
1553				cval = MIPS_SOFT << 8;
1554				break;
1555			case 31: /* FPC_CSR */
1556				cval = (int32_t)tf->fsr;
1557				break;
1558			default:
1559				cval = 0;
1560				break;
1561			}
1562			regs[inst.FRType.ft] = (int64_t)cval;
1563		}
1564		break;
1565	case OP_MT:
1566		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1567			return SIGILL;
1568		((uint64_t *)p->p_md.md_regs)[FPBASE + inst.FRType.fs] =
1569		    (int32_t)regs[inst.FRType.ft];
1570		break;
1571	case OP_DMT:
1572		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1573			return SIGILL;
1574		if ((tf->sr & SR_FR_32) != 0 || (inst.FRType.fs & 1) == 0) {
1575			fpu_store(p, tf, FMT_L, inst.FRType.fs,
1576			    regs[inst.FRType.ft]);
1577		}
1578		break;
1579	case OP_CT:
1580		if (inst.FRType.fd != 0 || inst.FRType.func != 0)
1581			return SIGILL;
1582		cval = (int32_t)regs[inst.FRType.ft];
1583		switch (inst.FRType.fs) {
1584		case 31: /* FPC_CSR */
1585			cval &= ~FPCSR_C_E;
1586			tf->fsr = cval;
1587			break;
1588		case 0:	/* FPC_ID */
1589		default:
1590			break;
1591		}
1592		break;
1593	case OP_BC:
1594	   {
1595		uint cc, nd, istf;
1596		int condition;
1597		vaddr_t dest;
1598		uint32_t dinsn;
1599
1600		cc = (inst.RType.rt & COPz_BC_CC_MASK) >> COPz_BC_CC_SHIFT;
1601		nd = inst.RType.rt & COPz_BCL_TF_MASK;
1602		istf = inst.RType.rt & COPz_BC_TF_MASK;
1603		condition = tf->fsr & FPCSR_CONDVAL(cc);
1604		if ((!condition && !istf) /*bc1f*/ ||
1605		    (condition && istf) /*bc1t*/) {
1606			/*
1607			 * Branch taken: if the delay slot is not a nop,
1608			 * copy the delay slot instruction to the dedicated
1609			 * relocation page, in order to be able to have the
1610			 * cpu process it and give control back to the
1611			 * kernel, for us to redirect to the branch
1612			 * destination.
1613			 */
1614			/* inline MipsEmulateBranch(tf, tf->pc, tf->fsr, insn)*/
1615			dest = tf->pc + 4 + ((short)inst.IType.imm << 2);
1616			if (copyin((const void *)(tf->pc + 4), &dinsn,
1617			    sizeof dinsn)) {
1618				sv->sival_ptr = (void *)(tf->pc + 4);
1619				return SIGSEGV;
1620			}
1621			if (dinsn == 0x00000000 /* nop */ ||
1622			    dinsn == 0x00000040 /* ssnop */) {
1623				tf->pc = dest;
1624			} else {
1625				if (fpe_branch_emulate(curproc, tf, dinsn,
1626				    dest) != 0)
1627					return SIGILL;
1628			}
1629		} else {
1630			/*
1631			 * Branch not taken: skip the instruction, and
1632			 * skip the delay slot if it was a `branch likely'
1633			 * instruction.
1634			 */
1635			tf->pc += 4;
1636			if (nd)
1637				tf->pc += 4;
1638		}
1639	    }
1640		break;
1641	}
1642
1643	return 0;
1644}
1645
1646/*
1647 * Emulate a COP1X non-FPU instruction.
1648 */
1649int
1650nofpu_emulate_cop1x(struct proc *p, struct trapframe *tf, uint32_t insn,
1651    union sigval *sv)
1652{
1653	register_t *regs = (register_t *)tf;
1654	InstFmt inst;
1655	vaddr_t va;
1656	uint64_t ddata;
1657	uint32_t wdata;
1658
1659	inst = *(InstFmt *)&insn;
1660	switch (inst.FRType.func) {
1661	case OP_LDXC1:
1662		if (inst.FQType.fs != 0)
1663			return SIGILL;
1664		va = (vaddr_t)regs[inst.FQType.fr] +
1665		    (vaddr_t)regs[inst.FQType.ft];
1666		if ((va & 0x07) != 0) {
1667			sv->sival_ptr = (void *)va;
1668			return SIGBUS;
1669		}
1670		if (copyin((const void *)va, &ddata, sizeof ddata) != 0) {
1671			sv->sival_ptr = (void *)va;
1672			return SIGSEGV;
1673		}
1674		if ((tf->sr & SR_FR_32) != 0 || (inst.FQType.fd & 1) == 0)
1675			fpu_store(p, tf, FMT_L, inst.FQType.fd, ddata);
1676		break;
1677	case OP_LWXC1:
1678		if (inst.FQType.fs != 0)
1679			return SIGILL;
1680		va = (vaddr_t)regs[inst.FQType.fr] +
1681		    (vaddr_t)regs[inst.FQType.ft];
1682		if ((va & 0x03) != 0) {
1683			sv->sival_ptr = (void *)va;
1684			return SIGBUS;
1685		}
1686		if (copyin((const void *)va, &wdata, sizeof wdata) != 0) {
1687			sv->sival_ptr = (void *)va;
1688			return SIGSEGV;
1689		}
1690		((uint64_t *)p->p_md.md_regs)[FPBASE + inst.FQType.fd] = wdata;
1691		break;
1692	case OP_SDXC1:
1693		if (inst.FQType.fd != 0)
1694			return SIGILL;
1695		va = (vaddr_t)regs[inst.FQType.fr] +
1696		    (vaddr_t)regs[inst.FQType.ft];
1697		if ((va & 0x07) != 0) {
1698			sv->sival_ptr = (void *)va;
1699			return SIGBUS;
1700		}
1701		if ((tf->sr & SR_FR_32) != 0 || (inst.FQType.fs & 1) == 0)
1702			ddata = fpu_load(p, tf, FMT_L, inst.FQType.fs);
1703		else {
1704			/* undefined behaviour, don't expose stack content */
1705			ddata = 0;
1706		}
1707		if (copyout(&ddata, (void *)va, sizeof ddata) != 0) {
1708			sv->sival_ptr = (void *)va;
1709			return SIGSEGV;
1710		}
1711		break;
1712	case OP_SWXC1:
1713		if (inst.FQType.fd != 0)
1714			return SIGILL;
1715		va = (vaddr_t)regs[inst.FQType.fr] +
1716		    (vaddr_t)regs[inst.FQType.ft];
1717		if ((va & 0x03) != 0) {
1718			sv->sival_ptr = (void *)va;
1719			return SIGBUS;
1720		}
1721		wdata = ((uint64_t *)p->p_md.md_regs)[FPBASE + inst.FQType.fs];
1722		if (copyout(&wdata, (void *)va, sizeof wdata) != 0) {
1723			sv->sival_ptr = (void *)va;
1724			return SIGSEGV;
1725		}
1726		break;
1727	case OP_PREFX:
1728		/* nothing to do */
1729		break;
1730	}
1731
1732	return 0;
1733}
1734
1735/*
1736 * Emulate a load/store instruction on FPU registers.
1737 */
1738int
1739nofpu_emulate_loadstore(struct proc *p, struct trapframe *tf, uint32_t insn,
1740    union sigval *sv)
1741{
1742	register_t *regs = (register_t *)tf;
1743	InstFmt inst;
1744	vaddr_t va;
1745	uint64_t ddata;
1746	uint32_t wdata;
1747
1748	inst = *(InstFmt *)&insn;
1749	switch (inst.IType.op) {
1750	case OP_LDC1:
1751		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1752		if ((va & 0x07) != 0) {
1753			sv->sival_ptr = (void *)va;
1754			return SIGBUS;
1755		}
1756		if (copyin((const void *)va, &ddata, sizeof ddata) != 0) {
1757			sv->sival_ptr = (void *)va;
1758			return SIGSEGV;
1759		}
1760		if ((tf->sr & SR_FR_32) != 0 || (inst.IType.rt & 1) == 0)
1761			fpu_store(p, tf, FMT_L, inst.IType.rt, ddata);
1762		break;
1763	case OP_LWC1:
1764		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1765		if ((va & 0x03) != 0) {
1766			sv->sival_ptr = (void *)va;
1767			return SIGBUS;
1768		}
1769		if (copyin((const void *)va, &wdata, sizeof wdata) != 0) {
1770			sv->sival_ptr = (void *)va;
1771			return SIGSEGV;
1772		}
1773		((uint64_t *)p->p_md.md_regs)[FPBASE + inst.IType.rt] = wdata;
1774		break;
1775	case OP_SDC1:
1776		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1777		if ((va & 0x07) != 0) {
1778			sv->sival_ptr = (void *)va;
1779			return SIGBUS;
1780		}
1781		if ((tf->sr & SR_FR_32) != 0 || (inst.IType.rt & 1) == 0)
1782			ddata = fpu_load(p, tf, FMT_L, inst.IType.rt);
1783		else {
1784			/* undefined behaviour, don't expose stack content */
1785			ddata = 0;
1786		}
1787		if (copyout(&ddata, (void *)va, sizeof ddata) != 0) {
1788			sv->sival_ptr = (void *)va;
1789			return SIGSEGV;
1790		}
1791		break;
1792	case OP_SWC1:
1793		va = (vaddr_t)regs[inst.IType.rs] + (int16_t)inst.IType.imm;
1794		if ((va & 0x03) != 0) {
1795			sv->sival_ptr = (void *)va;
1796			return SIGBUS;
1797		}
1798		wdata = ((uint64_t *)p->p_md.md_regs)[FPBASE + inst.IType.rt];
1799		if (copyout(&wdata, (void *)va, sizeof wdata) != 0) {
1800			sv->sival_ptr = (void *)va;
1801			return SIGSEGV;
1802		}
1803		break;
1804	}
1805
1806	return 0;
1807}
1808
1809/*
1810 * Emulate MOVF and MOVT.
1811 */
1812int
1813nofpu_emulate_movci(struct trapframe *tf, uint32_t insn)
1814{
1815	register_t *regs = (register_t *)tf;
1816	InstFmt inst;
1817	uint cc, istf;
1818	int condition;
1819
1820	inst = *(InstFmt *)&insn;
1821	if ((inst.RType.rt & 0x02) != 0 || inst.RType.shamt != 0)
1822		return SIGILL;
1823
1824	cc = inst.RType.rt >> 2;
1825	istf = inst.RType.rt & COPz_BC_TF_MASK;
1826	condition = tf->fsr & FPCSR_CONDVAL(cc);
1827	if ((!condition && !istf) /*movf*/ || (condition && istf) /*movt*/) {
1828		if (inst.RType.rd != ZERO)
1829			regs[inst.RType.rd] = regs[inst.RType.rs];
1830	}
1831
1832	return 0;
1833}
1834
1835#endif	/* FPUEMUL */
1836