fpu.c revision 124182
1/* 2 * Copyright (c) 1992, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This software was developed by the Computer Systems Engineering group 6 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and 7 * contributed to Berkeley. 8 * 9 * All advertising materials mentioning features or use of this software 10 * must display the following acknowledgement: 11 * This product includes software developed by the University of 12 * California, Lawrence Berkeley Laboratory. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 3. All advertising materials mentioning features or use of this software 23 * must display the following acknowledgement: 24 * This product includes software developed by the University of 25 * California, Berkeley and its contributors. 26 * 4. Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40 * SUCH DAMAGE. 41 */ 42/*- 43 * Copyright 2001 by Thomas Moestl <tmm@FreeBSD.org>. All rights reserved. 44 * 45 * Redistribution and use in source and binary forms, with or without 46 * modification, are permitted provided that the following conditions 47 * are met: 48 * 1. Redistributions of source code must retain the above copyright 49 * notice, this list of conditions and the following disclaimer. 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 57 * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 58 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 59 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 60 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 61 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 62 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 63 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 64 * 65 * @(#)fpu.c 8.1 (Berkeley) 6/11/93 66 * $NetBSD: fpu.c,v 1.11 2000/12/06 01:47:50 mrg Exp $ 67 */ 68 69#include <sys/cdefs.h> 70__FBSDID("$FreeBSD: head/lib/libc/sparc64/fpu/fpu.c 124182 2004-01-06 18:53:26Z nectar $"); 71 72#include <sys/param.h> 73 74#include "namespace.h" 75#include <errno.h> 76#include <unistd.h> 77#include <signal.h> 78#include <stdlib.h> 79#include "un-namespace.h" 80#include "libc_private.h" 81 82#include <machine/fp.h> 83#include <machine/frame.h> 84#include <machine/fsr.h> 85#include <machine/instr.h> 86#include <machine/pcb.h> 87#include <machine/tstate.h> 88 89#include "__sparc_utrap_private.h" 90#include "fpu_emu.h" 91#include "fpu_extern.h" 92 93/* 94 * Translate current exceptions into `first' exception. The 95 * bits go the wrong way for ffs() (0x10 is most important, etc). 96 * There are only 5, so do it the obvious way. 97 */ 98#define X1(x) x 99#define X2(x) x,x 100#define X4(x) x,x,x,x 101#define X8(x) X4(x),X4(x) 102#define X16(x) X8(x),X8(x) 103 104static char cx_to_trapx[] = { 105 X1(FSR_NX), 106 X2(FSR_DZ), 107 X4(FSR_UF), 108 X8(FSR_OF), 109 X16(FSR_NV) 110}; 111 112#ifdef FPU_DEBUG 113#ifdef FPU_DEBUG_MASK 114int __fpe_debug = FPU_DEBUG_MASK; 115#else 116int __fpe_debug = 0; 117#endif 118#endif /* FPU_DEBUG */ 119 120static int __fpu_execute(struct utrapframe *, struct fpemu *, u_int32_t, u_long); 121 122/* 123 * Need to use an fpstate on the stack; we could switch, so we cannot safely 124 * modify the pcb one, it might get overwritten. 125 */ 126int 127__fpu_exception(struct utrapframe *uf) 128{ 129 struct fpemu fe; 130 u_long fsr, tstate; 131 u_int insn; 132 int sig; 133 134 fsr = uf->uf_fsr; 135 136 switch (FSR_GET_FTT(fsr)) { 137 case FSR_FTT_NONE: 138 __utrap_write("lost FPU trap type\n"); 139 return (0); 140 case FSR_FTT_IEEE: 141 return (SIGFPE); 142 case FSR_FTT_SEQERR: 143 __utrap_write("FPU sequence error\n"); 144 return (SIGFPE); 145 case FSR_FTT_HWERR: 146 __utrap_write("FPU hardware error\n"); 147 return (SIGFPE); 148 case FSR_FTT_UNFIN: 149 case FSR_FTT_UNIMP: 150 break; 151 default: 152 __utrap_write("unknown FPU error\n"); 153 return (SIGFPE); 154 } 155 156 fe.fe_fsr = fsr & ~FSR_FTT_MASK; 157 insn = *(u_int32_t *)uf->uf_pc; 158 if (IF_OP(insn) != IOP_MISC || (IF_F3_OP3(insn) != INS2_FPop1 && 159 IF_F3_OP3(insn) != INS2_FPop2)) 160 __utrap_panic("bogus FP fault"); 161 tstate = uf->uf_state; 162 sig = __fpu_execute(uf, &fe, insn, tstate); 163 if (sig != 0) 164 return (sig); 165 __asm __volatile("ldx %0, %%fsr" : : "m" (fe.fe_fsr)); 166 return (0); 167} 168 169#ifdef FPU_DEBUG 170/* 171 * Dump a `fpn' structure. 172 */ 173void 174__fpu_dumpfpn(struct fpn *fp) 175{ 176 static char *class[] = { 177 "SNAN", "QNAN", "ZERO", "NUM", "INF" 178 }; 179 180 printf("%s %c.%x %x %x %xE%d", class[fp->fp_class + 2], 181 fp->fp_sign ? '-' : ' ', 182 fp->fp_mant[0], fp->fp_mant[1], 183 fp->fp_mant[2], fp->fp_mant[3], 184 fp->fp_exp); 185} 186#endif 187 188static int opmask[] = {0, 0, 1, 3}; 189 190/* Decode 5 bit register field depending on the type. */ 191#define RN_DECODE(tp, rn) \ 192 ((tp == FTYPE_DBL || tp == FTYPE_EXT ? INSFPdq_RN((rn)) : (rn)) & \ 193 ~opmask[tp]) 194 195/* Operand size in 32-bit registers. */ 196#define OPSZ(tp) ((tp) == FTYPE_LNG ? 2 : (1 << (tp))) 197 198/* 199 * Helper for forming the below case statements. Build only the op3 and opf 200 * field of the instruction, these are the only ones that need to match. 201 */ 202#define FOP(op3, opf) \ 203 ((op3) << IF_F3_OP3_SHIFT | (opf) << IF_F3_OPF_SHIFT) 204 205/* 206 * Implement a move operation for all supported operand types. The additional 207 * nand and xor parameters will be applied to the upper 32 bit word of the 208 * source operand. This allows to implement fabs and fneg (for fp operands 209 * only!) using this functions, too, by passing (1 << 31) for one of the 210 * parameters, and 0 for the other. 211 */ 212static void 213__fpu_mov(struct fpemu *fe, int type, int rd, int rs2, u_int32_t nand, 214 u_int32_t xor) 215{ 216 u_int64_t tmp64; 217 int i; 218 219 if (type == FTYPE_INT || type == FTYPE_SNG) 220 __fpu_setreg(rd, (__fpu_getreg(rs2) & ~nand) ^ xor); 221 else { 222 /* 223 * Need to use the double versions to be able to access 224 * the upper 32 fp registers. 225 */ 226 for (i = 0; i < OPSZ(type); i += 2, rd += 2, rs2 += 2) { 227 tmp64 = __fpu_getreg64(rs2); 228 if (i == 0) 229 tmp64 = (tmp64 & ~((u_int64_t)nand << 32)) ^ 230 ((u_int64_t)xor << 32); 231 __fpu_setreg64(rd, tmp64); 232 } 233 } 234} 235 236static __inline void 237__fpu_ccmov(struct fpemu *fe, int type, int rd, int rs2, 238 u_int32_t insn, int fcc) 239{ 240 241 if (IF_F4_COND(insn) == fcc) 242 __fpu_mov(fe, type, rd, rs2, 0, 0); 243} 244 245static int 246__fpu_cmpck(struct fpemu *fe) 247{ 248 u_long fsr; 249 int cx; 250 251 /* 252 * The only possible exception here is NV; catch it 253 * early and get out, as there is no result register. 254 */ 255 cx = fe->fe_cx; 256 fsr = fe->fe_fsr | (cx << FSR_CEXC_SHIFT); 257 if (cx != 0) { 258 if (fsr & (FSR_NV << FSR_TEM_SHIFT)) { 259 fe->fe_fsr = (fsr & ~FSR_FTT_MASK) | 260 FSR_FTT(FSR_FTT_IEEE); 261 return (SIGFPE); 262 } 263 fsr |= FSR_NV << FSR_AEXC_SHIFT; 264 } 265 fe->fe_fsr = fsr; 266 return (0); 267} 268 269/* 270 * Execute an FPU instruction (one that runs entirely in the FPU; not 271 * FBfcc or STF, for instance). On return, fe->fe_fs->fs_fsr will be 272 * modified to reflect the setting the hardware would have left. 273 * 274 * Note that we do not catch all illegal opcodes, so you can, for instance, 275 * multiply two integers this way. 276 */ 277static int 278__fpu_execute(struct utrapframe *uf, struct fpemu *fe, u_int32_t insn, u_long tstate) 279{ 280 struct fpn *fp; 281 int opf, rs1, rs2, rd, type, mask, cx, cond; 282 u_long reg, fsr; 283 u_int space[4]; 284 int i; 285 286 /* 287 * `Decode' and execute instruction. Start with no exceptions. 288 * The type of any opf opcode is in the bottom two bits, so we 289 * squish them out here. 290 */ 291 opf = insn & (IF_MASK(IF_F3_OP3_SHIFT, IF_F3_OP3_BITS) | 292 IF_MASK(IF_F3_OPF_SHIFT + 2, IF_F3_OPF_BITS - 2)); 293 type = IF_F3_OPF(insn) & 3; 294 rs1 = RN_DECODE(type, IF_F3_RS1(insn)); 295 rs2 = RN_DECODE(type, IF_F3_RS2(insn)); 296 rd = RN_DECODE(type, IF_F3_RD(insn)); 297 cond = 0; 298#ifdef notdef 299 if ((rs1 | rs2 | rd) & opmask[type]) 300 return (SIGILL); 301#endif 302 fsr = fe->fe_fsr; 303 fe->fe_fsr &= ~FSR_CEXC_MASK; 304 fe->fe_cx = 0; 305 switch (opf) { 306 case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(0))): 307 __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC0(fsr)); 308 return (0); 309 case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(1))): 310 __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC1(fsr)); 311 return (0); 312 case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(2))): 313 __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC2(fsr)); 314 return (0); 315 case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_FCC(3))): 316 __fpu_ccmov(fe, type, rd, rs2, insn, FSR_GET_FCC3(fsr)); 317 return (0); 318 case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_ICC)): 319 __fpu_ccmov(fe, type, rd, rs2, insn, 320 (tstate & TSTATE_ICC_MASK) >> TSTATE_ICC_SHIFT); 321 return (0); 322 case FOP(INS2_FPop2, INSFP2_FMOV_CC(IFCC_XCC)): 323 __fpu_ccmov(fe, type, rd, rs2, insn, 324 (tstate & TSTATE_XCC_MASK) >> (TSTATE_XCC_SHIFT)); 325 return (0); 326 case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_Z)): 327 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); 328 if (reg == 0) 329 __fpu_mov(fe, type, rd, rs2, 0, 0); 330 return (0); 331 case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LEZ)): 332 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); 333 if (reg <= 0) 334 __fpu_mov(fe, type, rd, rs2, 0, 0); 335 return (0); 336 case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_LZ)): 337 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); 338 if (reg < 0) 339 __fpu_mov(fe, type, rd, rs2, 0, 0); 340 return (0); 341 case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_NZ)): 342 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); 343 if (reg != 0) 344 __fpu_mov(fe, type, rd, rs2, 0, 0); 345 return (0); 346 case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GZ)): 347 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); 348 if (reg > 0) 349 __fpu_mov(fe, type, rd, rs2, 0, 0); 350 return (0); 351 case FOP(INS2_FPop2, INSFP2_FMOV_RC(IRCOND_GEZ)): 352 reg = __emul_fetch_reg(uf, IF_F4_RS1(insn)); 353 if (reg >= 0) 354 __fpu_mov(fe, type, rd, rs2, 0, 0); 355 return (0); 356 case FOP(INS2_FPop2, INSFP2_FCMP): 357 __fpu_explode(fe, &fe->fe_f1, type, rs1); 358 __fpu_explode(fe, &fe->fe_f2, type, rs2); 359 __fpu_compare(fe, 0, IF_F3_CC(insn)); 360 return (__fpu_cmpck(fe)); 361 case FOP(INS2_FPop2, INSFP2_FCMPE): 362 __fpu_explode(fe, &fe->fe_f1, type, rs1); 363 __fpu_explode(fe, &fe->fe_f2, type, rs2); 364 __fpu_compare(fe, 1, IF_F3_CC(insn)); 365 return (__fpu_cmpck(fe)); 366 case FOP(INS2_FPop1, INSFP1_FMOV): /* these should all be pretty obvious */ 367 __fpu_mov(fe, type, rd, rs2, 0, 0); 368 return (0); 369 case FOP(INS2_FPop1, INSFP1_FNEG): 370 __fpu_mov(fe, type, rd, rs2, 0, (1 << 31)); 371 return (0); 372 case FOP(INS2_FPop1, INSFP1_FABS): 373 __fpu_mov(fe, type, rd, rs2, (1 << 31), 0); 374 return (0); 375 case FOP(INS2_FPop1, INSFP1_FSQRT): 376 __fpu_explode(fe, &fe->fe_f1, type, rs2); 377 fp = __fpu_sqrt(fe); 378 break; 379 case FOP(INS2_FPop1, INSFP1_FADD): 380 __fpu_explode(fe, &fe->fe_f1, type, rs1); 381 __fpu_explode(fe, &fe->fe_f2, type, rs2); 382 fp = __fpu_add(fe); 383 break; 384 case FOP(INS2_FPop1, INSFP1_FSUB): 385 __fpu_explode(fe, &fe->fe_f1, type, rs1); 386 __fpu_explode(fe, &fe->fe_f2, type, rs2); 387 fp = __fpu_sub(fe); 388 break; 389 case FOP(INS2_FPop1, INSFP1_FMUL): 390 __fpu_explode(fe, &fe->fe_f1, type, rs1); 391 __fpu_explode(fe, &fe->fe_f2, type, rs2); 392 fp = __fpu_mul(fe); 393 break; 394 case FOP(INS2_FPop1, INSFP1_FDIV): 395 __fpu_explode(fe, &fe->fe_f1, type, rs1); 396 __fpu_explode(fe, &fe->fe_f2, type, rs2); 397 fp = __fpu_div(fe); 398 break; 399 case FOP(INS2_FPop1, INSFP1_FsMULd): 400 case FOP(INS2_FPop1, INSFP1_FdMULq): 401 if (type == FTYPE_EXT) 402 return (SIGILL); 403 __fpu_explode(fe, &fe->fe_f1, type, rs1); 404 __fpu_explode(fe, &fe->fe_f2, type, rs2); 405 type++; /* single to double, or double to quad */ 406 /* 407 * Recalculate rd (the old type applied for the source regs 408 * only, the target one has a different size). 409 */ 410 rd = RN_DECODE(type, IF_F3_RD(insn)); 411 fp = __fpu_mul(fe); 412 break; 413 case FOP(INS2_FPop1, INSFP1_FxTOs): 414 case FOP(INS2_FPop1, INSFP1_FxTOd): 415 case FOP(INS2_FPop1, INSFP1_FxTOq): 416 type = FTYPE_LNG; 417 __fpu_explode(fe, fp = &fe->fe_f1, type, rs2); 418 /* sneaky; depends on instruction encoding */ 419 type = (IF_F3_OPF(insn) >> 2) & 3; 420 rd = RN_DECODE(type, IF_F3_RD(insn)); 421 break; 422 case FOP(INS2_FPop1, INSFP1_FTOx): 423 __fpu_explode(fe, fp = &fe->fe_f1, type, rs2); 424 type = FTYPE_LNG; 425 mask = 1; /* needs 2 registers */ 426 rd = IF_F3_RD(insn) & ~mask; 427 break; 428 case FOP(INS2_FPop1, INSFP1_FTOs): 429 case FOP(INS2_FPop1, INSFP1_FTOd): 430 case FOP(INS2_FPop1, INSFP1_FTOq): 431 case FOP(INS2_FPop1, INSFP1_FTOi): 432 __fpu_explode(fe, fp = &fe->fe_f1, type, rs2); 433 /* sneaky; depends on instruction encoding */ 434 type = (IF_F3_OPF(insn) >> 2) & 3; 435 rd = RN_DECODE(type, IF_F3_RD(insn)); 436 break; 437 default: 438 return (SIGILL); 439 } 440 441 /* 442 * ALU operation is complete. Collapse the result and then check 443 * for exceptions. If we got any, and they are enabled, do not 444 * alter the destination register, just stop with an exception. 445 * Otherwise set new current exceptions and accrue. 446 */ 447 __fpu_implode(fe, fp, type, space); 448 cx = fe->fe_cx; 449 if (cx != 0) { 450 mask = (fsr >> FSR_TEM_SHIFT) & FSR_TEM_MASK; 451 if (cx & mask) { 452 /* not accrued??? */ 453 fsr = (fsr & ~FSR_FTT_MASK) | 454 FSR_FTT(FSR_FTT_IEEE) | 455 FSR_CEXC(cx_to_trapx[(cx & mask) - 1]); 456 return (SIGFPE); 457 } 458 fsr |= (cx << FSR_CEXC_SHIFT) | (cx << FSR_AEXC_SHIFT); 459 } 460 fe->fe_fsr = fsr; 461 if (type == FTYPE_INT || type == FTYPE_SNG) 462 __fpu_setreg(rd, space[0]); 463 else { 464 for (i = 0; i < OPSZ(type); i += 2) { 465 __fpu_setreg64(rd + i, ((u_int64_t)space[i] << 32) | 466 space[i + 1]); 467 } 468 } 469 return (0); /* success */ 470} 471