fpu.c revision 204974
1/*
2 * Copyright (c) 1992, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This software was developed by the Computer Systems Engineering group
6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7 * contributed to Berkeley.
8 *
9 * All advertising materials mentioning features or use of this software
10 * must display the following acknowledgement:
11 *	This product includes software developed by the University of
12 *	California, Lawrence Berkeley Laboratory.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 * 1. Redistributions of source code must retain the above copyright
18 *    notice, this list of conditions and the following disclaimer.
19 * 2. Redistributions in binary form must reproduce the above copyright
20 *    notice, this list of conditions and the following disclaimer in the
21 *    documentation and/or other materials provided with the distribution.
22 * 4. Neither the name of the University nor the names of its contributors
23 *    may be used to endorse or promote products derived from this software
24 *    without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38/*-
39 * Copyright 2001 by Thomas Moestl <tmm@FreeBSD.org>.  All rights reserved.
40 *
41 * Redistribution and use in source and binary forms, with or without
42 * modification, are permitted provided that the following conditions
43 * are met:
44 * 1. Redistributions of source code must retain the above copyright
45 *    notice, this list of conditions and the following disclaimer.
46 * 2. Redistributions in binary form must reproduce the above copyright
47 *    notice, this list of conditions and the following disclaimer in the
48 *    documentation and/or other materials provided with the distribution.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
51 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
52 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
53 * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
54 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
55 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
56 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
57 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
58 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
59 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
60 *
61 *	@(#)fpu.c	8.1 (Berkeley) 6/11/93
62 *	$NetBSD: fpu.c,v 1.11 2000/12/06 01:47:50 mrg Exp $
63 */
64
65#include <sys/cdefs.h>
66__FBSDID("$FreeBSD: head/lib/libc/sparc64/fpu/fpu.c 204974 2010-03-10 19:55:48Z marius $");
67
68#include <sys/param.h>
69
70#include "namespace.h"
71#include <errno.h>
72#include <unistd.h>
73#include <signal.h>
74#include <stdlib.h>
75#include "un-namespace.h"
76#include "libc_private.h"
77
78#include <machine/fp.h>
79#include <machine/frame.h>
80#include <machine/fsr.h>
81#include <machine/instr.h>
82#include <machine/pcb.h>
83#include <machine/tstate.h>
84
85#include "__sparc_utrap_private.h"
86#include "fpu_emu.h"
87#include "fpu_extern.h"
88
89/*
90 * Translate current exceptions into `first' exception.  The
91 * bits go the wrong way for ffs() (0x10 is most important, etc).
92 * There are only 5, so do it the obvious way.
93 */
94#define	X1(x) x
95#define	X2(x) x,x
96#define	X4(x) x,x,x,x
97#define	X8(x) X4(x),X4(x)
98#define	X16(x) X8(x),X8(x)
99
100static const char cx_to_trapx[] = {
101	X1(FSR_NX),
102	X2(FSR_DZ),
103	X4(FSR_UF),
104	X8(FSR_OF),
105	X16(FSR_NV)
106};
107
108#ifdef FPU_DEBUG
109#ifdef FPU_DEBUG_MASK
110int __fpe_debug = FPU_DEBUG_MASK;
111#else
112int __fpe_debug = 0;
113#endif
114#endif	/* FPU_DEBUG */
115
116static int __fpu_execute(struct utrapframe *, struct fpemu *, u_int32_t,
117    u_long);
118
119/*
120 * Need to use an fpstate on the stack; we could switch, so we cannot safely
121 * modify the pcb one, it might get overwritten.
122 */
123int
124__fpu_exception(struct utrapframe *uf)
125{
126	struct fpemu fe;
127	u_long fsr, tstate;
128	u_int insn;
129	int sig;
130
131	fsr = uf->uf_fsr;
132
133	switch (FSR_GET_FTT(fsr)) {
134	case FSR_FTT_NONE:
135		__utrap_write("lost FPU trap type\n");
136		return (0);
137	case FSR_FTT_IEEE:
138		return (SIGFPE);
139	case FSR_FTT_SEQERR:
140		__utrap_write("FPU sequence error\n");
141		return (SIGFPE);
142	case FSR_FTT_HWERR:
143		__utrap_write("FPU hardware error\n");
144		return (SIGFPE);
145	case FSR_FTT_UNFIN:
146	case FSR_FTT_UNIMP:
147		break;
148	default:
149		__utrap_write("unknown FPU error\n");
150		return (SIGFPE);
151	}
152
153	fe.fe_fsr = fsr & ~FSR_FTT_MASK;
154	insn = *(u_int32_t *)uf->uf_pc;
155	if (IF_OP(insn) != IOP_MISC || (IF_F3_OP3(insn) != INS2_FPop1 &&
156	    IF_F3_OP3(insn) != INS2_FPop2))
157		__utrap_panic("bogus FP fault");
158	tstate = uf->uf_state;
159	sig = __fpu_execute(uf, &fe, insn, tstate);
160	if (sig != 0)
161		return (sig);
162	__asm __volatile("ldx %0, %%fsr" : : "m" (fe.fe_fsr));
163	return (0);
164}
165
166#ifdef FPU_DEBUG
167/*
168 * Dump a `fpn' structure.
169 */
170void
171__fpu_dumpfpn(struct fpn *fp)
172{
173	static const char *const class[] = {
174		"SNAN", "QNAN", "ZERO", "NUM", "INF"
175	};
176
177	printf("%s %c.%x %x %x %xE%d", class[fp->fp_class + 2],
178		fp->fp_sign ? '-' : ' ',
179		fp->fp_mant[0],	fp->fp_mant[1],
180		fp->fp_mant[2], fp->fp_mant[3],
181		fp->fp_exp);
182}
183#endif
184
185static const int opmask[] = {0, 0, 1, 3, 1};
186
187/* Decode 5 bit register field depending on the type. */
188#define	RN_DECODE(tp, rn) \
189	((tp) >= FTYPE_DBL ? INSFPdq_RN(rn) & ~opmask[tp] : (rn))
190
191/*
192 * Helper for forming the below case statements. Build only the op3 and opf
193 * field of the instruction, these are the only ones that need to match.
194 */
195#define	FOP(op3, opf) \
196	((op3) << IF_F3_OP3_SHIFT | (opf) << IF_F3_OPF_SHIFT)
197
198/*
199 * Implement a move operation for all supported operand types. The additional
200 * nand and xor parameters will be applied to the upper 32 bit word of the
201 * source operand. This allows to implement fabs and fneg (for fp operands
202 * only!) using this functions, too, by passing (1 << 31) for one of the
203 * parameters, and 0 for the other.
204 */
205static void
206__fpu_mov(struct fpemu *fe, int type, int rd, int rs2, u_int32_t nand,
207    u_int32_t xor)
208{
209
210	if (type == FTYPE_INT || type == FTYPE_SNG)
211		__fpu_setreg(rd, (__fpu_getreg(rs2) & ~nand) ^ xor);
212	else {
213		/*
214		 * Need to use the double versions to be able to access
215		 * the upper 32 fp registers.
216		 */
217		__fpu_setreg64(rd, (__fpu_getreg64(rs2) &
218		    ~((u_int64_t)nand << 32)) ^ ((u_int64_t)xor << 32));
219		if (type == FTYPE_EXT)
220			__fpu_setreg64(rd + 2, __fpu_getreg64(rs2 + 2));
221	}
222}
223
224static __inline void
225__fpu_ccmov(struct fpemu *fe, int type, int rd, int rs2,
226    u_int32_t insn, int fcc)
227{
228
229	if (IF_F4_COND(insn) == fcc)
230		__fpu_mov(fe, type, rd, rs2, 0, 0);
231}
232
233static int
234__fpu_cmpck(struct fpemu *fe)
235{
236	u_long fsr;
237	int cx;
238
239	/*
240	 * The only possible exception here is NV; catch it
241	 * early and get out, as there is no result register.
242	 */
243	cx = fe->fe_cx;
244	fsr = fe->fe_fsr | (cx << FSR_CEXC_SHIFT);
245	if (cx != 0) {
246		if (fsr & (FSR_NV << FSR_TEM_SHIFT)) {
247			fe->fe_fsr = (fsr & ~FSR_FTT_MASK) |
248			    FSR_FTT(FSR_FTT_IEEE);
249			return (SIGFPE);
250		}
251		fsr |= FSR_NV << FSR_AEXC_SHIFT;
252	}
253	fe->fe_fsr = fsr;
254	return (0);
255}
256
257/*
258 * Execute an FPU instruction (one that runs entirely in the FPU; not
259 * FBfcc or STF, for instance).  On return, fe->fe_fs->fs_fsr will be
260 * modified to reflect the setting the hardware would have left.
261 *
262 * Note that we do not catch all illegal opcodes, so you can, for instance,
263 * multiply two integers this way.
264 */
265static int
266__fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn,
267    u_long tstate)
268{
269	struct fpn *fp;
270	int opf, rs1, rs2, rd, type, mask, cx, cond;
271	u_long reg, fsr;
272	u_int space[4];
273
274	/*
275	 * `Decode' and execute instruction.  Start with no exceptions.
276	 * The type of almost any OPF opcode is in the bottom two bits, so we
277	 * squish them out here.
278	 */
279	opf = insn & (IF_MASK(IF_F3_OP3_SHIFT, IF_F3_OP3_BITS) |
280	    IF_MASK(IF_F3_OPF_SHIFT + 2, IF_F3_OPF_BITS - 2));
281	type = IF_F3_OPF(insn) & 3;
282	rs1 = RN_DECODE(type, IF_F3_RS1(insn));
283	rs2 = RN_DECODE(type, IF_F3_RS2(insn));
284	rd = RN_DECODE(type, IF_F3_RD(insn));
285	cond = 0;
286#ifdef notdef
287	if ((rs1 | rs2 | rd) & opmask[type])
288		return (SIGILL);
289#endif
290	fsr = fe->fe_fsr;
291	fe->fe_fsr &= ~FSR_CEXC_MASK;
292	fe->fe_cx = 0;
293	switch (opf) {
294	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(0))):
295		__fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC0(fsr));
296		return (0);
297	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(1))):
298		__fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC1(fsr));
299		return (0);
300	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(2))):
301		__fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC2(fsr));
302		return (0);
303	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(3))):
304		__fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC3(fsr));
305		return (0);
306	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_ICC)):
307		__fpu_ccmov(fe, type, rd, rs2, insn,
308		    (tstate & TSTATE_ICC_MASK) >> TSTATE_ICC_SHIFT);
309		return (0);
310	case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_XCC)):
311		__fpu_ccmov(fe, type, rd, rs2, insn,
312		    (tstate & TSTATE_XCC_MASK) >> (TSTATE_XCC_SHIFT));
313		return (0);
314	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_Z)):
315		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
316		if (reg == 0)
317			__fpu_mov(fe, type, rd, rs2, 0, 0);
318		return (0);
319	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LEZ)):
320		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
321		if (reg <= 0)
322			__fpu_mov(fe, type, rd, rs2, 0, 0);
323		return (0);
324	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LZ)):
325		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
326		if (reg < 0)
327			__fpu_mov(fe, type, rd, rs2, 0, 0);
328		return (0);
329	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_NZ)):
330		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
331		if (reg != 0)
332			__fpu_mov(fe, type, rd, rs2, 0, 0);
333		return (0);
334	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GZ)):
335		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
336		if (reg > 0)
337			__fpu_mov(fe, type, rd, rs2, 0, 0);
338		return (0);
339	case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GEZ)):
340		reg = __emul_fetch_reg(uf, IF_F4_RS1(insn));
341		if (reg >= 0)
342			__fpu_mov(fe, type, rd, rs2, 0, 0);
343		return (0);
344	case FOP(INS2_FPop2, INSFP2_FCMP):
345		__fpu_explode(fe, &fe->fe_f1, type, rs1);
346		__fpu_explode(fe, &fe->fe_f2, type, rs2);
347		__fpu_compare(fe, 0, IF_F3_CC(insn));
348		return (__fpu_cmpck(fe));
349	case FOP(INS2_FPop2, INSFP2_FCMPE):
350		__fpu_explode(fe, &fe->fe_f1, type, rs1);
351		__fpu_explode(fe, &fe->fe_f2, type, rs2);
352		__fpu_compare(fe, 1, IF_F3_CC(insn));
353		return (__fpu_cmpck(fe));
354	case FOP(INS2_FPop1, INSFP1_FMOV):
355		__fpu_mov(fe, type, rd, rs2, 0, 0);
356		return (0);
357	case FOP(INS2_FPop1, INSFP1_FNEG):
358		__fpu_mov(fe, type, rd, rs2, 0, (1 << 31));
359		return (0);
360	case FOP(INS2_FPop1, INSFP1_FABS):
361		__fpu_mov(fe, type, rd, rs2, (1 << 31), 0);
362		return (0);
363	case FOP(INS2_FPop1, INSFP1_FSQRT):
364		__fpu_explode(fe, &fe->fe_f1, type, rs2);
365		fp = __fpu_sqrt(fe);
366		break;
367	case FOP(INS2_FPop1, INSFP1_FADD):
368		__fpu_explode(fe, &fe->fe_f1, type, rs1);
369		__fpu_explode(fe, &fe->fe_f2, type, rs2);
370		fp = __fpu_add(fe);
371		break;
372	case FOP(INS2_FPop1, INSFP1_FSUB):
373		__fpu_explode(fe, &fe->fe_f1, type, rs1);
374		__fpu_explode(fe, &fe->fe_f2, type, rs2);
375		fp = __fpu_sub(fe);
376		break;
377	case FOP(INS2_FPop1, INSFP1_FMUL):
378		__fpu_explode(fe, &fe->fe_f1, type, rs1);
379		__fpu_explode(fe, &fe->fe_f2, type, rs2);
380		fp = __fpu_mul(fe);
381		break;
382	case FOP(INS2_FPop1, INSFP1_FDIV):
383		__fpu_explode(fe, &fe->fe_f1, type, rs1);
384		__fpu_explode(fe, &fe->fe_f2, type, rs2);
385		fp = __fpu_div(fe);
386		break;
387	case FOP(INS2_FPop1, INSFP1_FsMULd):
388	case FOP(INS2_FPop1, INSFP1_FdMULq):
389		if (type == FTYPE_EXT)
390			return (SIGILL);
391		__fpu_explode(fe, &fe->fe_f1, type, rs1);
392		__fpu_explode(fe, &fe->fe_f2, type, rs2);
393		type++;	/* single to double, or double to quad */
394		/*
395		 * Recalculate rd (the old type applied for the source regs
396		 * only, the target one has a different size).
397		 */
398		rd = RN_DECODE(type, IF_F3_RD(insn));
399		fp = __fpu_mul(fe);
400		break;
401	case FOP(INS2_FPop1, INSFP1_FxTOs):
402	case FOP(INS2_FPop1, INSFP1_FxTOd):
403	case FOP(INS2_FPop1, INSFP1_FxTOq):
404		type = FTYPE_LNG;
405		rs2 = RN_DECODE(type, IF_F3_RS2(insn));
406		__fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
407		/* sneaky; depends on instruction encoding */
408		type = (IF_F3_OPF(insn) >> 2) & 3;
409		rd = RN_DECODE(type, IF_F3_RD(insn));
410		break;
411	case FOP(INS2_FPop1, INSFP1_FTOx):
412		__fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
413		type = FTYPE_LNG;
414		rd = RN_DECODE(type, IF_F3_RD(insn));
415		break;
416	case FOP(INS2_FPop1, INSFP1_FTOs):
417	case FOP(INS2_FPop1, INSFP1_FTOd):
418	case FOP(INS2_FPop1, INSFP1_FTOq):
419	case FOP(INS2_FPop1, INSFP1_FTOi):
420		__fpu_explode(fe, fp = &fe->fe_f1, type, rs2);
421		/* sneaky; depends on instruction encoding */
422		type = (IF_F3_OPF(insn) >> 2) & 3;
423		rd = RN_DECODE(type, IF_F3_RD(insn));
424		break;
425	default:
426		return (SIGILL);
427	}
428
429	/*
430	 * ALU operation is complete.  Collapse the result and then check
431	 * for exceptions.  If we got any, and they are enabled, do not
432	 * alter the destination register, just stop with an exception.
433	 * Otherwise set new current exceptions and accrue.
434	 */
435	__fpu_implode(fe, fp, type, space);
436	cx = fe->fe_cx;
437	if (cx != 0) {
438		mask = (fsr >> FSR_TEM_SHIFT) & FSR_TEM_MASK;
439		if (cx & mask) {
440			/* not accrued??? */
441			fsr = (fsr & ~FSR_FTT_MASK) |
442			    FSR_FTT(FSR_FTT_IEEE) |
443			    FSR_CEXC(cx_to_trapx[(cx & mask) - 1]);
444			return (SIGFPE);
445		}
446		fsr |= (cx << FSR_CEXC_SHIFT) | (cx << FSR_AEXC_SHIFT);
447	}
448	fe->fe_fsr = fsr;
449	if (type == FTYPE_INT || type == FTYPE_SNG)
450		__fpu_setreg(rd, space[0]);
451	else {
452		__fpu_setreg64(rd, ((u_int64_t)space[0] << 32) | space[1]);
453		if (type == FTYPE_EXT)
454			__fpu_setreg64(rd + 2,
455			    ((u_int64_t)space[2] << 32) | space[3]);
456	}
457	return (0);	/* success */
458}
459