1/* $OpenBSD: m88110_fp.c,v 1.14 2023/01/31 15:18:54 deraadt Exp $ */ 2 3/* 4 * Copyright (c) 2007, Miodrag Vallat. 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice, this permission notice, and the disclaimer below 9 * appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20#include <sys/param.h> 21#include <sys/proc.h> 22#include <sys/signalvar.h> 23#include <sys/systm.h> 24 25#include <machine/fpu.h> 26#include <machine/frame.h> 27#include <machine/ieeefp.h> 28#include <machine/trap.h> 29#include <machine/m88110.h> 30 31#include <lib/libkern/softfloat.h> 32 33#include <m88k/m88k/fpu.h> 34 35int m88110_fpu_emulate(struct trapframe *, u_int32_t); 36void m88110_fpu_fetch(struct trapframe *, u_int, u_int, u_int, fparg *); 37 38/* 39 * All 88110 floating-point exceptions are handled there. 40 * 41 * We can unfortunately not trust the floating-point exception cause 42 * register, as the 88110 will conveniently only set the ``unimplemented 43 * instruction'' bit, more often than not. 44 * 45 * So we ignore it completely, and try to emulate the faulting instruction. 46 * The instruction can be: 47 * 48 * - an invalid SFU1 opcode, in which case we'll send SIGILL to the process. 49 * 50 * - a genuinely unimplemented feature: fsqrt. 51 * 52 * - an opcode involving an odd-numbered register pair (as a double precision 53 * operand). Rather than issuing a correctly formed flavour in kernel mode, 54 * and having to handle a possible nested exception, we emulate it. This 55 * will of course be slower, but we have to draw the line somewhere. 56 * Gcc will however never produce such code, so we don't have to worry 57 * too much about this under OpenBSD. 58 * 59 * Note that, currently, opcodes involving the extended register file (XRF) 60 * are handled as invalid opcodes. This will eventually change once the 61 * toolchain can correctly assemble XRF instructions, and the XRF is saved 62 * across context switches (or not... lazy switching for XRF makes more 63 * sense). 64 */ 65 66void 67m88110_fpu_exception(struct trapframe *frame) 68{ 69 struct proc *p = curproc; 70 int fault_type; 71 vaddr_t fault_addr; 72 union sigval sv; 73 u_int32_t insn; 74 int sig; 75 76 fault_addr = frame->tf_exip & XIP_ADDR; 77 78 /* 79 * Skip the instruction now. Signals will blame the correct 80 * address, and this has to be done before trapsignal() is 81 * invoked, or we won't run the first instruction of the signal 82 * handler... 83 */ 84 m88110_skip_insn(frame); 85 86 /* 87 * The low-level exception code did not save the floating point 88 * exception registers. Do it now, and reset the exception 89 * cause register. 90 */ 91 __asm__ volatile ("fldcr %0, %%fcr0" : "=r"(frame->tf_fpecr)); 92 __asm__ volatile ("fldcr %0, %%fcr62" : "=r"(frame->tf_fpsr)); 93 __asm__ volatile ("fldcr %0, %%fcr63" : "=r"(frame->tf_fpcr)); 94 __asm__ volatile ("fstcr %r0, %fcr0"); 95 96 /* 97 * Fetch the faulting instruction. This should not fail, if it 98 * does, it's probably not your lucky day. 99 */ 100 if (copyinsn(p, (u_int32_t *)fault_addr, (u_int32_t *)&insn) != 0) { 101 sig = SIGBUS; 102 fault_type = BUS_OBJERR; 103 goto deliver; 104 } 105 106 switch (insn >> 26) { 107 case 0x20: 108 /* 109 * f{ld,st,x}cr instruction. If it caused a fault in 110 * user mode, this is a privilege violation. 111 */ 112 sig = SIGILL; 113 fault_type = ILL_PRVREG; 114 goto deliver; 115 case 0x21: 116 /* 117 * ``real'' FPU instruction. We'll try to emulate it, 118 * unless FPU is disabled. 119 */ 120 if (frame->tf_epsr & PSR_SFD1) { /* don't bother */ 121 sig = SIGFPE; 122 fault_type = FPE_FLTINV; 123 goto deliver; 124 } 125 sig = m88110_fpu_emulate(frame, insn); 126 fault_type = SI_NOINFO; 127 /* 128 * Update the floating point status register regardless of 129 * whether we'll deliver a signal or not. 130 */ 131 __asm__ volatile ("fstcr %0, %%fcr62" :: "r"(frame->tf_fpsr)); 132 break; 133 default: 134 /* 135 * Not a FPU instruction. Should not have raised this 136 * exception, so bail out. 137 */ 138 sig = SIGILL; 139 fault_type = ILL_ILLOPC; 140 goto deliver; 141 } 142 143 if (sig != 0) { 144 if (sig == SIGILL) 145 fault_type = ILL_ILLOPC; 146 else { 147 if (frame->tf_fpecr & FPECR_FIOV) 148 fault_type = FPE_FLTSUB; 149 else if (frame->tf_fpecr & FPECR_FROP) 150 fault_type = FPE_FLTINV; 151 else if (frame->tf_fpecr & FPECR_FDVZ) 152 fault_type = FPE_INTDIV; 153 else if (frame->tf_fpecr & FPECR_FUNF) { 154 if (frame->tf_fpsr & FPSR_EFUNF) 155 fault_type = FPE_FLTUND; 156 else if (frame->tf_fpsr & FPSR_EFINX) 157 fault_type = FPE_FLTRES; 158 } else if (frame->tf_fpecr & FPECR_FOVF) { 159 if (frame->tf_fpsr & FPSR_EFOVF) 160 fault_type = FPE_FLTOVF; 161 else if (frame->tf_fpsr & FPSR_EFINX) 162 fault_type = FPE_FLTRES; 163 } else if (frame->tf_fpecr & FPECR_FINX) 164 fault_type = FPE_FLTRES; 165 } 166 167deliver: 168 sv.sival_ptr = (void *)fault_addr; 169 trapsignal(p, sig, 0, fault_type, sv); 170 } 171} 172 173/* 174 * Load a floating-point argument into a fparg union, then convert it to 175 * the required format if it is of larger precision. 176 * 177 * This assumes the final format (width) is not FTYPE_INT, and the original 178 * format (orig_width) <= width. 179 */ 180void 181m88110_fpu_fetch(struct trapframe *frame, u_int regno, u_int orig_width, 182 u_int width, fparg *dest) 183{ 184 u_int32_t tmp; 185 186 switch (orig_width) { 187 case FTYPE_INT: 188 tmp = regno == 0 ? 0 : frame->tf_r[regno]; 189 switch (width) { 190 case FTYPE_SNG: 191 dest->sng = int32_to_float32(tmp); 192 break; 193 case FTYPE_DBL: 194 dest->dbl = int32_to_float64(tmp); 195 break; 196 } 197 break; 198 case FTYPE_SNG: 199 tmp = regno == 0 ? 0 : frame->tf_r[regno]; 200 switch (width) { 201 case FTYPE_SNG: 202 dest->sng = tmp; 203 break; 204 case FTYPE_DBL: 205 dest->dbl = float32_to_float64(tmp); 206 break; 207 } 208 break; 209 case FTYPE_DBL: 210 tmp = regno == 0 ? 0 : frame->tf_r[regno]; 211 dest->dbl = ((float64)tmp) << 32; 212 tmp = regno == 31 ? 0 : frame->tf_r[regno + 1]; 213 dest->dbl |= (float64)tmp; 214 break; 215 } 216} 217 218/* 219 * Emulate an FPU instruction. On return, the trapframe registers 220 * will be modified to reflect the settings the hardware would have left. 221 */ 222int 223m88110_fpu_emulate(struct trapframe *frame, u_int32_t insn) 224{ 225 u_int rf, rd, rs1, rs2, t1, t2, td, tmax, opcode; 226 u_int32_t old_fpsr, old_fpcr; 227 int rc; 228 229 fparg arg1, arg2, dest; 230 231 /* 232 * Crack the instruction. 233 */ 234 rd = (insn >> 21) & 0x1f; 235 rs1 = (insn >> 16) & 0x1f; 236 rs2 = insn & 0x1f; 237 rf = (insn >> 15) & 0x01; 238 opcode = (insn >> 11) & 0x0f; 239 t1 = (insn >> 9) & 0x03; 240 t2 = (insn >> 7) & 0x03; 241 td = (insn >> 5) & 0x03; 242 243 /* 244 * Discard invalid opcodes, as well as instructions involving XRF, 245 * since we do not support them yet. 246 */ 247 if (rf != 0) 248 return (SIGILL); 249 250 switch (opcode) { 251 case 0x00: /* fmul */ 252 case 0x05: /* fadd */ 253 case 0x06: /* fsub */ 254 case 0x0e: /* fdiv */ 255 if ((t1 != FTYPE_SNG && t1 != FTYPE_DBL) || 256 (t2 != FTYPE_SNG && t2 != FTYPE_DBL) || 257 (td != FTYPE_SNG && td != FTYPE_DBL)) 258 return (SIGILL); 259 break; 260 case 0x04: /* flt */ 261 if (t1 != 0x00) /* flt on XRF */ 262 return (SIGILL); 263 if ((td != FTYPE_SNG && td != FTYPE_DBL) || 264 t2 != 0x00 || rs1 != 0) 265 return (SIGILL); 266 break; 267 case 0x07: /* fcmp, fcmpu */ 268 if ((t1 != FTYPE_SNG && t1 != FTYPE_DBL) || 269 (t2 != FTYPE_SNG && t2 != FTYPE_DBL)) 270 return (SIGILL); 271 if (td != 0x00 /* fcmp */ && td != 0x01 /* fcmpu */) 272 return (SIGILL); 273 break; 274 case 0x09: /* int */ 275 case 0x0a: /* nint */ 276 case 0x0b: /* trnc */ 277 if ((t2 != FTYPE_SNG && t2 != FTYPE_DBL) || 278 t1 != 0x00 || td != 0x00 || rs1 != 0) 279 return (SIGILL); 280 break; 281 case 0x01: /* fcvt */ 282 if (t2 == td) 283 return (SIGILL); 284 /* FALLTHROUGH */ 285 case 0x0f: /* fsqrt */ 286 if ((t2 != FTYPE_SNG && t2 != FTYPE_DBL) || 287 (td != FTYPE_SNG && td != FTYPE_DBL) || 288 t1 != 0x00 || rs1 != 0) 289 return (SIGILL); 290 break; 291 default: 292 case 0x08: /* mov */ 293 return (SIGILL); 294 } 295 296 /* 297 * Temporarily reset the status register, so that we can tell 298 * which exceptions are new after processing the opcode. 299 */ 300 old_fpsr = frame->tf_fpsr; 301 frame->tf_fpsr = 0; 302 303 /* 304 * Save fpcr as well, since we might need to change rounding mode 305 * temporarily. 306 */ 307 old_fpcr = frame->tf_fpcr; 308 309 /* 310 * The logic for instruction emulation is: 311 * 312 * - the computation precision is the largest one of all the operands. 313 * - all source operands are converted to this precision if needed. 314 * - computation is performed. 315 * - the result is stored into the destination operand, converting it 316 * to the destination precision if lower. 317 */ 318 319 switch (opcode) { 320 case 0x00: /* fmul */ 321 tmax = fpu_precision(t1, t2, td); 322 m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1); 323 m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2); 324 switch (tmax) { 325 case FTYPE_SNG: 326 dest.sng = float32_mul(arg1.sng, arg2.sng); 327 break; 328 case FTYPE_DBL: 329 dest.dbl = float64_mul(arg1.dbl, arg2.dbl); 330 break; 331 } 332 fpu_store(frame, rd, tmax, td, &dest); 333 break; 334 335 case 0x01: /* fcvt */ 336 tmax = fpu_precision(IGNORE_PRECISION, t2, td); 337 m88110_fpu_fetch(frame, rs2, t2, tmax, &dest); 338 fpu_store(frame, rd, tmax, td, &dest); 339 break; 340 341 case 0x04: /* flt */ 342 m88110_fpu_fetch(frame, rs2, FTYPE_INT, td, &dest); 343 fpu_store(frame, rd, td, td, &dest); 344 break; 345 346 case 0x05: /* fadd */ 347 tmax = fpu_precision(t1, t2, td); 348 m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1); 349 m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2); 350 switch (tmax) { 351 case FTYPE_SNG: 352 dest.sng = float32_add(arg1.sng, arg2.sng); 353 break; 354 case FTYPE_DBL: 355 dest.dbl = float64_add(arg1.dbl, arg2.dbl); 356 break; 357 } 358 fpu_store(frame, rd, tmax, td, &dest); 359 break; 360 361 case 0x06: /* fsub */ 362 tmax = fpu_precision(t1, t2, td); 363 m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1); 364 m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2); 365 switch (tmax) { 366 case FTYPE_SNG: 367 dest.sng = float32_sub(arg1.sng, arg2.sng); 368 break; 369 case FTYPE_DBL: 370 dest.dbl = float64_sub(arg1.dbl, arg2.dbl); 371 break; 372 } 373 fpu_store(frame, rd, tmax, td, &dest); 374 break; 375 376 case 0x07: /* fcmp, fcmpu */ 377 tmax = fpu_precision(t1, t2, IGNORE_PRECISION); 378 m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1); 379 m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2); 380 fpu_compare(frame, &arg1, &arg2, tmax, rd, td /* fcmpu */); 381 break; 382 383 case 0x09: /* int */ 384do_int: 385 m88110_fpu_fetch(frame, rs2, t2, t2, &dest); 386 fpu_store(frame, rd, t2, FTYPE_INT, &dest); 387 break; 388 389 case 0x0a: /* nint */ 390 /* round to nearest */ 391 frame->tf_fpcr = (old_fpcr & ~(FPCR_RD_MASK << FPCR_RD_SHIFT)) | 392 (FP_RN << FPCR_RD_SHIFT); 393 goto do_int; 394 395 case 0x0b: /* trnc */ 396 /* round towards zero */ 397 frame->tf_fpcr = (old_fpcr & ~(FPCR_RD_MASK << FPCR_RD_SHIFT)) | 398 (FP_RZ << FPCR_RD_SHIFT); 399 goto do_int; 400 401 case 0x0e: /* fdiv */ 402 tmax = fpu_precision(t1, t2, td); 403 m88110_fpu_fetch(frame, rs1, t1, tmax, &arg1); 404 m88110_fpu_fetch(frame, rs2, t2, tmax, &arg2); 405 switch (tmax) { 406 case FTYPE_SNG: 407 dest.sng = float32_div(arg1.sng, arg2.sng); 408 break; 409 case FTYPE_DBL: 410 dest.dbl = float64_div(arg1.dbl, arg2.dbl); 411 break; 412 } 413 fpu_store(frame, rd, tmax, td, &dest); 414 break; 415 416 case 0x0f: /* sqrt */ 417 tmax = fpu_precision(IGNORE_PRECISION, t2, td); 418 m88110_fpu_fetch(frame, rs2, t2, tmax, &arg1); 419 switch (tmax) { 420 case FTYPE_SNG: 421 dest.sng = float32_sqrt(arg1.sng); 422 break; 423 case FTYPE_DBL: 424 dest.dbl = float64_sqrt(arg1.dbl); 425 break; 426 } 427 fpu_store(frame, rd, tmax, td, &dest); 428 break; 429 } 430 431 /* 432 * Mark new exceptions, if any, in the fpsr, and decide whether 433 * to send a signal or not. 434 */ 435 436 if (frame->tf_fpsr & old_fpcr) 437 rc = SIGFPE; 438 else 439 rc = 0; 440 frame->tf_fpsr |= old_fpsr; 441 442 /* 443 * Restore fpcr as well. 444 */ 445 frame->tf_fpcr = old_fpcr; 446 447 return (rc); 448} 449