fpu.c revision 124182
1/*
2 * Copyright (c) 1992, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This software was developed by the Computer Systems Engineering group
6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7 * contributed to Berkeley.
8 *
9 * All advertising materials mentioning features or use of this software
10 * must display the following acknowledgement:
11 *	This product includes software developed by the University of
12 *	California, Lawrence Berkeley Laboratory.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 *    notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 *    notice, this list of conditions and the following disclaimer in the
21 *    documentation and/or other materials provided with the distribution.
22 * 3. All advertising materials mentioning features or use of this software
23 *    must display the following acknowledgement:
24 *	This product includes software developed by the University of
25 *	California, Berkeley and its contributors.
26 * 4. Neither the name of the University nor the names of its contributors
27 *    may be used to endorse or promote products derived from this software
28 *    without specific prior written permission.
29 *
30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40 * SUCH DAMAGE.
41 */
42/*-
43 * Copyright 2001 by Thomas Moestl <tmm@FreeBSD.org>.  All rights reserved.
44 *
45 * Redistribution and use in source and binary forms, with or without
46 * modification, are permitted provided that the following conditions
47 * are met:
48 * 1. Redistributions of source code must retain the above copyright
49 *    notice, this list of conditions and the following disclaimer.
50 * 2. Redistributions in binary form must reproduce the above copyright
51 *    notice, this list of conditions and the following disclaimer in the
52 *    documentation and/or other materials provided with the distribution.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
57 * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
58 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
61 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
62 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
63 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
64 *
65 *	@(#)fpu.c	8.1 (Berkeley) 6/11/93
66 *	$NetBSD: fpu.c,v 1.11 2000/12/06 01:47:50 mrg Exp $
67 */
68
69#include <sys/cdefs.h>
70__FBSDID("$FreeBSD: head/lib/libc/sparc64/fpu/fpu.c 124182 2004-01-06 18:53:26Z nectar $");
71
72#include <sys/param.h>
73
74#include "namespace.h"
75#include <errno.h>
76#include <unistd.h>
77#include <signal.h>
78#include <stdlib.h>
79#include "un-namespace.h"
80#include "libc_private.h"
81
82#include <machine/fp.h>
83#include <machine/frame.h>
84#include <machine/fsr.h>
85#include <machine/instr.h>
86#include <machine/pcb.h>
87#include <machine/tstate.h>
88
89#include "__sparc_utrap_private.h"
90#include "fpu_emu.h"
91#include "fpu_extern.h"
92
93/*
94 * Translate current exceptions into `first' exception.  The
95 * bits go the wrong way for ffs() (0x10 is most important, etc).
96 * There are only 5, so do it the obvious way.
97 */
98#define	X1(x) x
99#define	X2(x) x,x
100#define	X4(x) x,x,x,x
101#define	X8(x) X4(x),X4(x)
102#define	X16(x) X8(x),X8(x)
103
104static char cx_to_trapx[] = {
105	X1(FSR_NX),
106	X2(FSR_DZ),
107	X4(FSR_UF),
108	X8(FSR_OF),
109	X16(FSR_NV)
110};
111
112#ifdef FPU_DEBUG
113#ifdef FPU_DEBUG_MASK
114int __fpe_debug = FPU_DEBUG_MASK;
115#else
116int __fpe_debug = 0;
117#endif
118#endif	/* FPU_DEBUG */
119
120static int __fpu_execute(struct utrapframe *, struct fpemu *, u_int32_t, u_long);
121
122/*
123 * Need to use an fpstate on the stack; we could switch, so we cannot safely
124 * modify the pcb one, it might get overwritten.
125 */
126int
127__fpu_exception(struct utrapframe *uf)
128{
129	struct fpemu fe;
130	u_long fsr, tstate;
131	u_int insn;
132	int sig;
133
134	fsr = uf->uf_fsr;
135
136	switch (FSR_GET_FTT(fsr)) {
137	case FSR_FTT_NONE:
138		__utrap_write("lost FPU trap type\n");
139		return (0);
140	case FSR_FTT_IEEE:
141		return (SIGFPE);
142	case FSR_FTT_SEQERR:
143		__utrap_write("FPU sequence error\n");
144		return (SIGFPE);
145	case FSR_FTT_HWERR:
146		__utrap_write("FPU hardware error\n");
147		return (SIGFPE);
148	case FSR_FTT_UNFIN:
149	case FSR_FTT_UNIMP:
150		break;
151	default:
152		__utrap_write("unknown FPU error\n");
153		return (SIGFPE);
154	}
155
156	fe.fe_fsr = fsr & ~FSR_FTT_MASK;
157	insn = *(u_int32_t *)uf->uf_pc;
158	if (IF_OP(insn) != IOP_MISC || (IF_F3_OP3(insn) != INS2_FPop1 &&
159	    IF_F3_OP3(insn) != INS2_FPop2))
160		__utrap_panic("bogus FP fault");
161	tstate = uf->uf_state;
162	sig = __fpu_execute(uf, &fe, insn, tstate);
163	if (sig != 0)
164		return (sig);
165	__asm __volatile("ldx %0, %%fsr" : : "m" (fe.fe_fsr));
166	return (0);
167}
168
169#ifdef FPU_DEBUG
170/*
171 * Dump a `fpn' structure.
172 */
173void
174__fpu_dumpfpn(struct fpn *fp)
175{
176	static char *class[] = {
177		"SNAN", "QNAN", "ZERO", "NUM", "INF"
178	};
179
180	printf("%s %c.%x %x %x %xE%d", class[fp->fp_class + 2],
181		fp->fp_sign ? '-' : ' ',
182		fp->fp_mant[0],	fp->fp_mant[1],
183		fp->fp_mant[2], fp->fp_mant[3],
184		fp->fp_exp);
185}
186#endif
187
188static int opmask[] = {0, 0, 1, 3};
189
190/* Decode 5 bit register field depending on the type. */
191#define	RN_DECODE(tp, rn) \
192	((tp == FTYPE_DBL || tp == FTYPE_EXT ? INSFPdq_RN((rn)) : (rn)) & \
193	    ~opmask[tp])
194
195/* Operand size in 32-bit registers. */
196#define	OPSZ(tp)	((tp) == FTYPE_LNG ? 2 : (1 << (tp)))
197
198/*
199 * Helper for forming the below case statements. Build only the op3 and opf
200 * field of the instruction, these are the only ones that need to match.
201 */
202#define	FOP(op3, opf) \
203	((op3) << IF_F3_OP3_SHIFT | (opf) << IF_F3_OPF_SHIFT)
204
205/*
206 * Implement a move operation for all supported operand types. The additional
207 * nand and xor parameters will be applied to the upper 32 bit word of the
208 * source operand. This allows to implement fabs and fneg (for fp operands
209 * only!) using this functions, too, by passing (1 << 31) for one of the
210 * parameters, and 0 for the other.
211 */
212static void
213__fpu_mov(struct fpemu *fe, int type, int rd, int rs2, u_int32_t nand,
214    u_int32_t xor)
215{
216	u_int64_t tmp64;
217	int i;
218
219	if (type == FTYPE_INT || type == FTYPE_SNG)
220		__fpu_setreg(rd, (__fpu_getreg(rs2) & ~nand) ^ xor);
221	else {
222		/*
223		 * Need to use the double versions to be able to access
224		 * the upper 32 fp registers.
225		 */
226		for (i = 0; i < OPSZ(type); i += 2, rd += 2, rs2 += 2) {
227			tmp64 = __fpu_getreg64(rs2);
228			if (i == 0)
229				tmp64 = (tmp64 & ~((u_int64_t)nand << 32)) ^
230				    ((u_int64_t)xor << 32);
231			__fpu_setreg64(rd, tmp64);
232		}
233	}
234}
235
236static __inline void
237__fpu_ccmov(struct fpemu *fe, int type, int rd, int rs2,
238    u_int32_t insn, int fcc)
239{
240
241	if (IF_F4_COND(insn) == fcc)
242		__fpu_mov(fe, type, rd, rs2, 0, 0);
243}
244
245static int
246__fpu_cmpck(struct fpemu *fe)
247{
248	u_long fsr;
249	int cx;
250
251	/*
252	 * The only possible exception here is NV; catch it
253	 * early and get out, as there is no result register.
254	 */
255	cx = fe->fe_cx;
256	fsr = fe->fe_fsr | (cx << FSR_CEXC_SHIFT);
257	if (cx != 0) {
258		if (fsr & (FSR_NV << FSR_TEM_SHIFT)) {
259			fe->fe_fsr = (fsr & ~FSR_FTT_MASK) |
260			    FSR_FTT(FSR_FTT_IEEE);
261			return (SIGFPE);
262		}
263		fsr |= FSR_NV << FSR_AEXC_SHIFT;
264	}
265	fe->fe_fsr = fsr;
266	return (0);
267}
268
269/*
270 * Execute an FPU instruction (one that runs entirely in the FPU; not
271 * FBfcc or STF, for instance).  On return, fe->fe_fs->fs_fsr will be
272 * modified to reflect the setting the hardware would have left.
273 *
274 * Note that we do not catch all illegal opcodes, so you can, for instance,
275 * multiply two integers this way.
276 */
277static int
278__fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long tstate)
279{
280	struct fpn *fp;
281	int opf, rs1, rs2, rd, type, mask, cx, cond;
282	u_long reg, fsr;
283	u_int space[4];
284	int i;
285
286	/*
287	 * `Decode' and execute instruction.  Start with no exceptions.
288	 * The type of any opf opcode is in the bottom two bits, so we
289	 * squish them out here.
290	 */
291	opf = insn & (IF_MASK(IF_F3_OP3_SHIFT, IF_F3_OP3_BITS) |
292	    IF_MASK(IF_F3_OPF_SHIFT + 2, IF_F3_OPF_BITS - 2));
293	type = IF_F3_OPF(insn) & 3;
294	rs1 = RN_DECODE(type, IF_F3_RS1(insn));
295	rs2 = RN_DECODE(type, IF_F3_RS2(insn));
296	rd = RN_DECODE(type, IF_F3_RD(insn));
297	cond = 0;
298#ifdef notdef
299	if ((rs1 | rs2 | rd) & opmask[type])
300		return (SIGILL);
301#endif
302	fsr = fe->fe_fsr;
303	fe->fe_fsr &= ~FSR_CEXC_MASK;
304	fe->fe_cx = 0;
305	switch (opf) {
306	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(0))):
307		__fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC0(fsr));
308		return (0);
309	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(1))):
310		__fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC1(fsr));
311		return (0);
312	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(2))):
313		__fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC2(fsr));
314		return (0);
315	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(3))):
316		__fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC3(fsr));
317		return (0);
318	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_ICC)):
319		__fpu_ccmov(fe, type, rd, rs2, insn,
320		    (tstate & TSTATE_ICC_MASK) >> TSTATE_ICC_SHIFT);
321		return (0);
322	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_XCC)):
323		__fpu_ccmov(fe, type, rd, rs2, insn,
324		    (tstate & TSTATE_XCC_MASK) >> (TSTATE_XCC_SHIFT));
325		return (0);
326	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_Z)):
327		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
328		if (reg == 0)
329			__fpu_mov(fe, type, rd, rs2, 0, 0);
330		return (0);
331	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LEZ)):
332		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
333		if (reg <= 0)
334			__fpu_mov(fe, type, rd, rs2, 0, 0);
335		return (0);
336	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LZ)):
337		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
338		if (reg < 0)
339			__fpu_mov(fe, type, rd, rs2, 0, 0);
340		return (0);
341	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_NZ)):
342		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
343		if (reg != 0)
344			__fpu_mov(fe, type, rd, rs2, 0, 0);
345		return (0);
346	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GZ)):
347		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
348		if (reg > 0)
349			__fpu_mov(fe, type, rd, rs2, 0, 0);
350		return (0);
351	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GEZ)):
352		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
353		if (reg >= 0)
354			__fpu_mov(fe, type, rd, rs2, 0, 0);
355		return (0);
356	case FOP(INS2_FPop2, INSFP2_FCMP):
357		__fpu_explode(fe, &fe->fe_f1, type, rs1);
358		__fpu_explode(fe, &fe->fe_f2, type, rs2);
359		__fpu_compare(fe, 0, IF_F3_CC(insn));
360		return (__fpu_cmpck(fe));
361	case FOP(INS2_FPop2, INSFP2_FCMPE):
362		__fpu_explode(fe, &fe->fe_f1, type, rs1);
363		__fpu_explode(fe, &fe->fe_f2, type, rs2);
364		__fpu_compare(fe, 1, IF_F3_CC(insn));
365		return (__fpu_cmpck(fe));
366	case FOP(INS2_FPop1, INSFP1_FMOV):	/* these should all be pretty obvious */
367		__fpu_mov(fe, type, rd, rs2, 0, 0);
368		return (0);
369	case FOP(INS2_FPop1, INSFP1_FNEG):
370		__fpu_mov(fe, type, rd, rs2, 0, (1 << 31));
371		return (0);
372	case FOP(INS2_FPop1, INSFP1_FABS):
373		__fpu_mov(fe, type, rd, rs2, (1 << 31), 0);
374		return (0);
375	case FOP(INS2_FPop1, INSFP1_FSQRT):
376		__fpu_explode(fe, &fe->fe_f1, type, rs2);
377		fp = __fpu_sqrt(fe);
378		break;
379	case FOP(INS2_FPop1, INSFP1_FADD):
380		__fpu_explode(fe, &fe->fe_f1, type, rs1);
381		__fpu_explode(fe, &fe->fe_f2, type, rs2);
382		fp = __fpu_add(fe);
383		break;
384	case FOP(INS2_FPop1, INSFP1_FSUB):
385		__fpu_explode(fe, &fe->fe_f1, type, rs1);
386		__fpu_explode(fe, &fe->fe_f2, type, rs2);
387		fp = __fpu_sub(fe);
388		break;
389	case FOP(INS2_FPop1, INSFP1_FMUL):
390		__fpu_explode(fe, &fe->fe_f1, type, rs1);
391		__fpu_explode(fe, &fe->fe_f2, type, rs2);
392		fp = __fpu_mul(fe);
393		break;
394	case FOP(INS2_FPop1, INSFP1_FDIV):
395		__fpu_explode(fe, &fe->fe_f1, type, rs1);
396		__fpu_explode(fe, &fe->fe_f2, type, rs2);
397		fp = __fpu_div(fe);
398		break;
399	case FOP(INS2_FPop1, INSFP1_FsMULd):
400	case FOP(INS2_FPop1, INSFP1_FdMULq):
401		if (type == FTYPE_EXT)
402			return (SIGILL);
403		__fpu_explode(fe, &fe->fe_f1, type, rs1);
404		__fpu_explode(fe, &fe->fe_f2, type, rs2);
405		type++;	/* single to double, or double to quad */
406		/*
407		 * Recalculate rd (the old type applied for the source regs
408		 * only, the target one has a different size).
409		 */
410		rd = RN_DECODE(type, IF_F3_RD(insn));
411		fp = __fpu_mul(fe);
412		break;
413	case FOP(INS2_FPop1, INSFP1_FxTOs):
414	case FOP(INS2_FPop1, INSFP1_FxTOd):
415	case FOP(INS2_FPop1, INSFP1_FxTOq):
416		type = FTYPE_LNG;
417		__fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
418		/* sneaky; depends on instruction encoding */
419		type = (IF_F3_OPF(insn) >> 2) & 3;
420		rd = RN_DECODE(type, IF_F3_RD(insn));
421		break;
422	case FOP(INS2_FPop1, INSFP1_FTOx):
423		__fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
424		type = FTYPE_LNG;
425		mask = 1;	/* needs 2 registers */
426		rd = IF_F3_RD(insn) & ~mask;
427		break;
428	case FOP(INS2_FPop1, INSFP1_FTOs):
429	case FOP(INS2_FPop1, INSFP1_FTOd):
430	case FOP(INS2_FPop1, INSFP1_FTOq):
431	case FOP(INS2_FPop1, INSFP1_FTOi):
432		__fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
433		/* sneaky; depends on instruction encoding */
434		type = (IF_F3_OPF(insn) >> 2) & 3;
435		rd = RN_DECODE(type, IF_F3_RD(insn));
436		break;
437	default:
438		return (SIGILL);
439	}
440
441	/*
442	 * ALU operation is complete.  Collapse the result and then check
443	 * for exceptions.  If we got any, and they are enabled, do not
444	 * alter the destination register, just stop with an exception.
445	 * Otherwise set new current exceptions and accrue.
446	 */
447	__fpu_implode(fe, fp, type, space);
448	cx = fe->fe_cx;
449	if (cx != 0) {
450		mask = (fsr >> FSR_TEM_SHIFT) & FSR_TEM_MASK;
451		if (cx & mask) {
452			/* not accrued??? */
453			fsr = (fsr & ~FSR_FTT_MASK) |
454			    FSR_FTT(FSR_FTT_IEEE) |
455			    FSR_CEXC(cx_to_trapx[(cx & mask) - 1]);
456			return (SIGFPE);
457		}
458		fsr |= (cx << FSR_CEXC_SHIFT) | (cx << FSR_AEXC_SHIFT);
459	}
460	fe->fe_fsr = fsr;
461	if (type == FTYPE_INT || type == FTYPE_SNG)
462		__fpu_setreg(rd, space[0]);
463	else {
464		for (i = 0; i < OPSZ(type); i += 2) {
465			__fpu_setreg64(rd + i, ((u_int64_t)space[i] << 32) |
466			    space[i + 1]);
467		}
468	}
469	return (0);	/* success */
470}
471