fpu.c revision 2802
141227Sjdp/*- 241227Sjdp * Copyright (c) 1990 William Jolitz. 341227Sjdp * Copyright (c) 1991 The Regents of the University of California. 441227Sjdp * All rights reserved. 541227Sjdp * 641227Sjdp * Redistribution and use in source and binary forms, with or without 741227Sjdp * modification, are permitted provided that the following conditions 841227Sjdp * are met: 941227Sjdp * 1. Redistributions of source code must retain the above copyright 1041227Sjdp * notice, this list of conditions and the following disclaimer. 1141227Sjdp * 2. Redistributions in binary form must reproduce the above copyright 1241227Sjdp * notice, this list of conditions and the following disclaimer in the 1341227Sjdp * documentation and/or other materials provided with the distribution. 1441227Sjdp * 3. All advertising materials mentioning features or use of this software 1541227Sjdp * must display the following acknowledgement: 1641227Sjdp * This product includes software developed by the University of 1741227Sjdp * California, Berkeley and its contributors. 1841227Sjdp * 4. Neither the name of the University nor the names of its contributors 1941227Sjdp * may be used to endorse or promote products derived from this software 2041227Sjdp * without specific prior written permission. 2141227Sjdp * 2241227Sjdp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2341227Sjdp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2441227Sjdp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2541227Sjdp * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2641227Sjdp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2776242Smarkm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2876242Smarkm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2976242Smarkm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3041227Sjdp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3142917Sjdp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3241227Sjdp * SUCH DAMAGE. 3341227Sjdp * 3441227Sjdp * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 3541227Sjdp * $Id: npx.c,v 1.11 1994/09/09 23:12:32 wollman Exp $ 3676242Smarkm */ 3776242Smarkm 3876242Smarkm#include "npx.h" 3976242Smarkm#if NNPX > 0 4076242Smarkm 4143056Sjdp#include <sys/param.h> 4241227Sjdp#include <sys/systm.h> 4341227Sjdp#include <sys/conf.h> 4441227Sjdp#include <sys/file.h> 4541227Sjdp#include <sys/proc.h> 4641227Sjdp#include <machine/cpu.h> 4743056Sjdp#include <machine/pcb.h> 4842917Sjdp#include <machine/trap.h> 4941227Sjdp#include <sys/ioctl.h> 5041227Sjdp#include <machine/specialreg.h> 5174870Sru#include <i386/isa/icu.h> 5241227Sjdp#include <i386/isa/isa_device.h> 5341227Sjdp#include <i386/isa/isa.h> 5441227Sjdp 5541227Sjdp/* 5674870Sru * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. 5741227Sjdp */ 5841227Sjdp 5941227Sjdp#ifdef __GNUC__ 6041227Sjdp 6141227Sjdp#define fldcw(addr) __asm("fldcw %0" : : "m" (*addr)) 6241227Sjdp#define fnclex() __asm("fnclex") 6341227Sjdp#define fninit() __asm("fninit") 6441227Sjdp#define fnsave(addr) __asm("fnsave %0" : "=m" (*addr) : "0" (*addr)) 6541227Sjdp#define fnstcw(addr) __asm("fnstcw %0" : "=m" (*addr) : "0" (*addr)) 6641227Sjdp#define fnstsw(addr) __asm("fnstsw %0" : "=m" (*addr) : "0" (*addr)) 6742917Sjdp#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fwait") 6842917Sjdp#define frstor(addr) __asm("frstor %0" : : "m" (*addr)) 6946665Sjdp#define fwait() __asm("fwait") 7057841Skris#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ 7142917Sjdp : : "n" (CR0_TS) : "ax") 7242917Sjdp#define stop_emulating() __asm("clts") 7357841Skris 7451445Smarkm#else /* not __GNUC__ */ 7551445Smarkm 7659302Skrisvoid fldcw __P((caddr_t addr)); 7746665Sjdpvoid fnclex __P((void)); 7842917Sjdpvoid fninit __P((void)); 7942917Sjdpvoid fnsave __P((caddr_t addr)); 8057841Skrisvoid fnstcw __P((caddr_t addr)); 8157455Smarkmvoid fnstsw __P((caddr_t addr)); 8257455Smarkmvoid fp_divide_by_0 __P((void)); 8342917Sjdpvoid frstor __P((caddr_t addr)); 8442917Sjdpvoid fwait __P((void)); 8542917Sjdpvoid start_emulating __P((void)); 8642917Sjdpvoid stop_emulating __P((void)); 8743056Sjdp 8842917Sjdp#endif /* __GNUC__ */ 8943056Sjdp 9043056Sjdptypedef u_char bool_t; 9143056Sjdp 9243056Sjdpextern struct gate_descriptor idt[]; 9343056Sjdp 9442917Sjdpint npxdna __P((void)); 9542917Sjdpvoid npxexit __P((struct proc *p)); 9642917Sjdpvoid npxinit __P((u_int control)); 9742917Sjdpvoid npxintr __P((struct intrframe frame)); 9843056Sjdpvoid npxsave __P((struct save87 *addr)); 9942917Sjdpstatic int npxattach __P((struct isa_device *dvp)); 10042917Sjdpstatic int npxprobe __P((struct isa_device *dvp)); 10142917Sjdpstatic int npxprobe1 __P((struct isa_device *dvp)); 10242917Sjdp 10342917Sjdpstruct isa_driver npxdriver = { 10442917Sjdp npxprobe, npxattach, "npx", 10542917Sjdp}; 10642917Sjdp 10742917Sjdpu_int npx0_imask; 10842917Sjdpstruct proc *npxproc; 10942917Sjdp 11043056Sjdpstatic bool_t npx_ex16; 11142917Sjdpstatic bool_t npx_exists; 11242917Sjdpint hw_float; 11342917Sjdpstatic struct gate_descriptor npx_idt_probeintr; 11441227Sjdpstatic int npx_intrno; 11541227Sjdpstatic volatile u_int npx_intrs_while_probing; 11641227Sjdpstatic bool_t npx_irq13; 11741227Sjdpstatic volatile u_int npx_traps_while_probing; 11841227Sjdp 11941227Sjdp/* 12041227Sjdp * Special interrupt handlers. Someday intr0-intr15 will be used to count 12141227Sjdp * interrupts. We'll still need a special exception 16 handler. The busy 12241227Sjdp * latch stuff in probintr() can be moved to npxprobe(). 12341227Sjdp */ 12441227Sjdpvoid probeintr(void); 12541227Sjdpasm 126(" 127 .text 128_probeintr: 129 ss 130 incl _npx_intrs_while_probing 131 pushl %eax 132 movb $0x20,%al # EOI (asm in strings loses cpp features) 133 outb %al,$0xa0 # IO_ICU2 134 outb %al,$0x20 #IO_ICU1 135 movb $0,%al 136 outb %al,$0xf0 # clear BUSY# latch 137 popl %eax 138 iret 139"); 140 141void probetrap(void); 142asm 143(" 144 .text 145_probetrap: 146 ss 147 incl _npx_traps_while_probing 148 fnclex 149 iret 150"); 151 152/* 153 * Probe routine. Initialize cr0 to give correct behaviour for [f]wait 154 * whether the device exists or not (XXX should be elsewhere). Set flags 155 * to tell npxattach() what to do. Modify device struct if npx doesn't 156 * need to use interrupts. Return 1 if device exists. 157 */ 158static int 159npxprobe(dvp) 160 struct isa_device *dvp; 161{ 162 int result; 163 u_long save_eflags; 164 u_char save_icu1_mask; 165 u_char save_icu2_mask; 166 struct gate_descriptor save_idt_npxintr; 167 struct gate_descriptor save_idt_npxtrap; 168 /* 169 * This routine is now just a wrapper for npxprobe1(), to install 170 * special npx interrupt and trap handlers, to enable npx interrupts 171 * and to disable other interrupts. Someday isa_configure() will 172 * install suitable handlers and run with interrupts enabled so we 173 * won't need to do so much here. 174 */ 175 npx_intrno = NRSVIDT + ffs(dvp->id_irq) - 1; 176 save_eflags = read_eflags(); 177 disable_intr(); 178 save_icu1_mask = inb(IO_ICU1 + 1); 179 save_icu2_mask = inb(IO_ICU2 + 1); 180 save_idt_npxintr = idt[npx_intrno]; 181 save_idt_npxtrap = idt[16]; 182 outb(IO_ICU1 + 1, ~(IRQ_SLAVE | dvp->id_irq)); 183 outb(IO_ICU2 + 1, ~(dvp->id_irq >> 8)); 184 setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL); 185 setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL); 186 npx_idt_probeintr = idt[npx_intrno]; 187 enable_intr(); 188 result = npxprobe1(dvp); 189 disable_intr(); 190 outb(IO_ICU1 + 1, save_icu1_mask); 191 outb(IO_ICU2 + 1, save_icu2_mask); 192 idt[npx_intrno] = save_idt_npxintr; 193 idt[16] = save_idt_npxtrap; 194 write_eflags(save_eflags); 195 return (result); 196} 197 198static int 199npxprobe1(dvp) 200 struct isa_device *dvp; 201{ 202 int control; 203 int status; 204#ifdef lint 205 npxintr(); 206#endif 207 /* 208 * Partially reset the coprocessor, if any. Some BIOS's don't reset 209 * it after a warm boot. 210 */ 211 outb(0xf1, 0); /* full reset on some systems, NOP on others */ 212 outb(0xf0, 0); /* clear BUSY# latch */ 213 /* 214 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT 215 * instructions. We must set the CR0_MP bit and use the CR0_TS 216 * bit to control the trap, because setting the CR0_EM bit does 217 * not cause WAIT instructions to trap. It's important to trap 218 * WAIT instructions - otherwise the "wait" variants of no-wait 219 * control instructions would degenerate to the "no-wait" variants 220 * after FP context switches but work correctly otherwise. It's 221 * particularly important to trap WAITs when there is no NPX - 222 * otherwise the "wait" variants would always degenerate. 223 * 224 * Try setting CR0_NE to get correct error reporting on 486DX's. 225 * Setting it should fail or do nothing on lesser processors. 226 */ 227 load_cr0(rcr0() | CR0_MP | CR0_NE); 228 /* 229 * But don't trap while we're probing. 230 */ 231 stop_emulating(); 232 /* 233 * Finish resetting the coprocessor, if any. If there is an error 234 * pending, then we may get a bogus IRQ13, but probeintr() will handle 235 * it OK. Bogus halts have never been observed, but we enabled 236 * IRQ13 and cleared the BUSY# latch early to handle them anyway. 237 */ 238 fninit(); 239 DELAY(1000); /* wait for any IRQ13 (fwait might hang) */ 240#ifdef DIAGNOSTIC 241 if (npx_intrs_while_probing != 0) 242 printf("fninit caused %u bogus npx interrupt(s)\n", 243 npx_intrs_while_probing); 244 if (npx_traps_while_probing != 0) 245 printf("fninit caused %u bogus npx trap(s)\n", 246 npx_traps_while_probing); 247#endif 248 /* 249 * Check for a status of mostly zero. 250 */ 251 status = 0x5a5a; 252 fnstsw(&status); 253 if ((status & 0xb8ff) == 0) { 254 /* 255 * Good, now check for a proper control word. 256 */ 257 control = 0x5a5a; 258 fnstcw(&control); 259 if ((control & 0x1f3f) == 0x033f) { 260 hw_float = npx_exists = 1; 261 /* 262 * We have an npx, now divide by 0 to see if exception 263 * 16 works. 264 */ 265 control &= ~(1 << 2); /* enable divide by 0 trap */ 266 fldcw(&control); 267 npx_traps_while_probing = npx_intrs_while_probing = 0; 268 fp_divide_by_0(); 269 if (npx_traps_while_probing != 0) { 270 /* 271 * Good, exception 16 works. 272 */ 273 npx_ex16 = 1; 274 dvp->id_irq = 0; /* zap the interrupt */ 275 /* 276 * special return value to flag that we do not 277 * actually use any I/O registers 278 */ 279 return (-1); 280 } 281 if (npx_intrs_while_probing != 0) { 282 /* 283 * Bad, we are stuck with IRQ13. 284 */ 285 npx_irq13 = 1; 286 npx0_imask = dvp->id_irq; /* npxattach too late */ 287 return (IO_NPXSIZE); 288 } 289 /* 290 * Worse, even IRQ13 is broken. Use emulator. 291 */ 292 } 293 } 294 /* 295 * Probe failed, but we want to get to npxattach to initialize the 296 * emulator and say that it has been installed. XXX handle devices 297 * that aren't really devices better. 298 */ 299 dvp->id_irq = 0; 300 /* 301 * special return value to flag that we do not 302 * actually use any I/O registers 303 */ 304 return (-1); 305} 306 307/* 308 * Attach routine - announce which it is, and wire into system 309 */ 310int 311npxattach(dvp) 312 struct isa_device *dvp; 313{ 314 if (!npx_ex16 && !npx_irq13) { 315 if (npx_exists) { 316 printf("npx%d: Error reporting broken, using 387 emulator\n",dvp->id_unit); 317 hw_float = npx_exists = 0; 318 } else { 319 printf("npx%d: 387 Emulator\n",dvp->id_unit); 320 } 321 } 322 npxinit(__INITIAL_NPXCW__); 323 return (1); /* XXX unused */ 324} 325 326/* 327 * Initialize floating point unit. 328 */ 329void 330npxinit(control) 331 u_int control; 332{ 333 struct save87 dummy; 334 335 if (!npx_exists) 336 return; 337 /* 338 * fninit has the same h/w bugs as fnsave. Use the detoxified 339 * fnsave to throw away any junk in the fpu. fnsave initializes 340 * the fpu and sets npxproc = NULL as important side effects. 341 */ 342 npxsave(&dummy); 343 stop_emulating(); 344 fldcw(&control); 345 if (curpcb != NULL) 346 fnsave(&curpcb->pcb_savefpu); 347 start_emulating(); 348} 349 350/* 351 * Free coprocessor (if we have it). 352 */ 353void 354npxexit(p) 355 struct proc *p; 356{ 357 358 if (p == npxproc) { 359 start_emulating(); 360 npxproc = NULL; 361 } 362} 363 364/* 365 * Record the FPU state and reinitialize it all except for the control word. 366 * Then generate a SIGFPE. 367 * 368 * Reinitializing the state allows naive SIGFPE handlers to longjmp without 369 * doing any fixups. 370 * 371 * XXX there is currently no way to pass the full error state to signal 372 * handlers, and if this is a nested interrupt there is no way to pass even 373 * a status code! So there is no way to have a non-naive SIGFPE handler. At 374 * best a handler could do an fninit followed by an fldcw of a static value. 375 * fnclex would be of little use because it would leave junk on the FPU stack. 376 * Returning from the handler would be even less safe than usual because 377 * IRQ13 exception handling makes exceptions even less precise than usual. 378 */ 379void 380npxintr(frame) 381 struct intrframe frame; 382{ 383 int code; 384 385 if (npxproc == NULL || !npx_exists) { 386 /* XXX no %p in stand/printf.c. Cast to quiet gcc -Wall. */ 387 printf("npxintr: npxproc = %lx, curproc = %lx, npx_exists = %d\n", 388 (u_long) npxproc, (u_long) curproc, npx_exists); 389 panic("npxintr from nowhere"); 390 } 391 if (npxproc != curproc) { 392 printf("npxintr: npxproc = %lx, curproc = %lx, npx_exists = %d\n", 393 (u_long) npxproc, (u_long) curproc, npx_exists); 394 panic("npxintr from non-current process"); 395 } 396 /* 397 * Save state. This does an implied fninit. It had better not halt 398 * the cpu or we'll hang. 399 */ 400 outb(0xf0, 0); 401 fnsave(&curpcb->pcb_savefpu); 402 fwait(); 403 /* 404 * Restore control word (was clobbered by fnsave). 405 */ 406 fldcw(&curpcb->pcb_savefpu.sv_env.en_cw); 407 fwait(); 408 /* 409 * Remember the exception status word and tag word. The current 410 * (almost fninit'ed) fpu state is in the fpu and the exception 411 * state just saved will soon be junk. However, the implied fninit 412 * doesn't change the error pointers or register contents, and we 413 * preserved the control word and will copy the status and tag 414 * words, so the complete exception state can be recovered. 415 */ 416 curpcb->pcb_savefpu.sv_ex_sw = curpcb->pcb_savefpu.sv_env.en_sw; 417 curpcb->pcb_savefpu.sv_ex_tw = curpcb->pcb_savefpu.sv_env.en_tw; 418 419 /* 420 * Pass exception to process. 421 */ 422 if (ISPL(frame.if_cs) == SEL_UPL) { 423 /* 424 * Interrupt is essentially a trap, so we can afford to call 425 * the SIGFPE handler (if any) as soon as the interrupt 426 * returns. 427 * 428 * XXX little or nothing is gained from this, and plenty is 429 * lost - the interrupt frame has to contain the trap frame 430 * (this is otherwise only necessary for the rescheduling trap 431 * in doreti, and the frame for that could easily be set up 432 * just before it is used). 433 */ 434 curproc->p_md.md_regs = (int *)&frame.if_es; 435#ifdef notyet 436 /* 437 * Encode the appropriate code for detailed information on 438 * this exception. 439 */ 440 code = XXX_ENCODE(curpcb->pcb_savefpu.sv_ex_sw); 441#else 442 code = 0; /* XXX */ 443#endif 444 trapsignal(curproc, SIGFPE, code); 445 } else { 446 /* 447 * Nested interrupt. These losers occur when: 448 * o an IRQ13 is bogusly generated at a bogus time, e.g.: 449 * o immediately after an fnsave or frstor of an 450 * error state. 451 * o a couple of 386 instructions after 452 * "fstpl _memvar" causes a stack overflow. 453 * These are especially nasty when combined with a 454 * trace trap. 455 * o an IRQ13 occurs at the same time as another higher- 456 * priority interrupt. 457 * 458 * Treat them like a true async interrupt. 459 */ 460 psignal(npxproc, SIGFPE); 461 } 462} 463 464/* 465 * Implement device not available (DNA) exception 466 * 467 * It would be better to switch FP context here (only). This would require 468 * saving the state in the proc table instead of in the pcb. 469 */ 470int 471npxdna() 472{ 473 if (!npx_exists) 474 return (0); 475 if (npxproc != NULL) { 476 printf("npxdna: npxproc = %lx, curproc = %lx\n", 477 (u_long) npxproc, (u_long) curproc); 478 panic("npxdna"); 479 } 480 stop_emulating(); 481 /* 482 * Record new context early in case frstor causes an IRQ13. 483 */ 484 npxproc = curproc; 485 /* 486 * The following frstor may cause an IRQ13 when the state being 487 * restored has a pending error. The error will appear to have been 488 * triggered by the current (npx) user instruction even when that 489 * instruction is a no-wait instruction that should not trigger an 490 * error (e.g., fnclex). On at least one 486 system all of the 491 * no-wait instructions are broken the same as frstor, so our 492 * treatment does not amplify the breakage. On at least one 493 * 386/Cyrix 387 system, fnclex works correctly while frstor and 494 * fnsave are broken, so our treatment breaks fnclex if it is the 495 * first FPU instruction after a context switch. 496 */ 497 frstor(&curpcb->pcb_savefpu); 498 499 return (1); 500} 501 502/* 503 * Wrapper for fnsave instruction to handle h/w bugs. If there is an error 504 * pending, then fnsave generates a bogus IRQ13 on some systems. Force 505 * any IRQ13 to be handled immediately, and then ignore it. This routine is 506 * often called at splhigh so it must not use many system services. In 507 * particular, it's much easier to install a special handler than to 508 * guarantee that it's safe to use npxintr() and its supporting code. 509 */ 510void 511npxsave(addr) 512 struct save87 *addr; 513{ 514 u_char icu1_mask; 515 u_char icu2_mask; 516 u_char old_icu1_mask; 517 u_char old_icu2_mask; 518 struct gate_descriptor save_idt_npxintr; 519 520 disable_intr(); 521 old_icu1_mask = inb(IO_ICU1 + 1); 522 old_icu2_mask = inb(IO_ICU2 + 1); 523 save_idt_npxintr = idt[npx_intrno]; 524 outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask)); 525 outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8)); 526 idt[npx_intrno] = npx_idt_probeintr; 527 enable_intr(); 528 stop_emulating(); 529 fnsave(addr); 530 fwait(); 531 start_emulating(); 532 npxproc = NULL; 533 disable_intr(); 534 icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */ 535 icu2_mask = inb(IO_ICU2 + 1); 536 outb(IO_ICU1 + 1, 537 (icu1_mask & ~npx0_imask) | (old_icu1_mask & npx0_imask)); 538 outb(IO_ICU2 + 1, 539 (icu2_mask & ~(npx0_imask >> 8)) 540 | (old_icu2_mask & (npx0_imask >> 8))); 541 idt[npx_intrno] = save_idt_npxintr; 542 enable_intr(); /* back to usual state */ 543} 544 545#endif /* NNPX > 0 */ 546