npx.c revision 31255
1/*- 2 * Copyright (c) 1990 William Jolitz. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by the University of 17 * California, Berkeley and its contributors. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 35 * $Id: npx.c,v 1.53 1997/10/28 11:43:54 bde Exp $ 36 */ 37 38#include "npx.h" 39#if NNPX > 0 40 41#include "opt_math_emulate.h" 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/kernel.h> 46#include <sys/sysctl.h> 47#include <sys/conf.h> 48#include <sys/proc.h> 49#ifdef NPX_DEBUG 50#include <sys/syslog.h> 51#endif 52#include <sys/signalvar.h> 53 54#ifndef SMP 55#include <machine/asmacros.h> 56#endif 57#include <machine/cputypes.h> 58#include <machine/frame.h> 59#include <machine/ipl.h> 60#ifndef SMP 61#include <machine/md_var.h> 62#endif 63#include <machine/pcb.h> 64#include <machine/psl.h> 65#ifndef SMP 66#include <machine/clock.h> 67#endif 68#include <machine/specialreg.h> 69#include <machine/segments.h> 70 71#ifndef SMP 72#include <i386/isa/icu.h> 73#include <i386/isa/intr_machdep.h> 74#include <i386/isa/isa.h> 75#endif 76#include <i386/isa/isa_device.h> 77 78/* 79 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. 80 */ 81 82/* Configuration flags. */ 83#define NPX_DISABLE_I586_OPTIMIZED_BCOPY (1 << 0) 84#define NPX_DISABLE_I586_OPTIMIZED_BZERO (1 << 1) 85#define NPX_DISABLE_I586_OPTIMIZED_COPYIO (1 << 2) 86 87/* XXX - should be in header file. */ 88extern void (*bcopy_vector) __P((const void *from, void *to, size_t len)); 89extern void (*ovbcopy_vector) __P((const void *from, void *to, size_t len)); 90extern int (*copyin_vector) __P((const void *udaddr, void *kaddr, size_t len)); 91extern int (*copyout_vector) __P((const void *kaddr, void *udaddr, size_t len)); 92 93void i586_bcopy __P((const void *from, void *to, size_t len)); 94void i586_bzero __P((void *buf, size_t len)); 95int i586_copyin __P((const void *udaddr, void *kaddr, size_t len)); 96int i586_copyout __P((const void *kaddr, void *udaddr, size_t len)); 97 98#ifdef __GNUC__ 99 100#define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) 101#define fnclex() __asm("fnclex") 102#define fninit() __asm("fninit") 103#define fnop() __asm("fnop") 104#define fnsave(addr) __asm("fnsave %0" : "=m" (*(addr))) 105#define fnstcw(addr) __asm("fnstcw %0" : "=m" (*(addr))) 106#define fnstsw(addr) __asm("fnstsw %0" : "=m" (*(addr))) 107#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop") 108#define frstor(addr) __asm("frstor %0" : : "m" (*(addr))) 109#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ 110 : : "n" (CR0_TS) : "ax") 111#define stop_emulating() __asm("clts") 112 113#else /* not __GNUC__ */ 114 115void fldcw __P((caddr_t addr)); 116void fnclex __P((void)); 117void fninit __P((void)); 118void fnop __P((void)); 119void fnsave __P((caddr_t addr)); 120void fnstcw __P((caddr_t addr)); 121void fnstsw __P((caddr_t addr)); 122void fp_divide_by_0 __P((void)); 123void frstor __P((caddr_t addr)); 124void start_emulating __P((void)); 125void stop_emulating __P((void)); 126 127#endif /* __GNUC__ */ 128 129typedef u_char bool_t; 130 131static int npxattach __P((struct isa_device *dvp)); 132static int npxprobe __P((struct isa_device *dvp)); 133static int npxprobe1 __P((struct isa_device *dvp)); 134 135struct isa_driver npxdriver = { 136 npxprobe, npxattach, "npx", 137}; 138 139int hw_float; /* XXX currently just alias for npx_exists */ 140 141SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, 142 CTLFLAG_RD, &hw_float, 0, 143 "Floatingpoint instructions executed in hardware"); 144 145static u_int npx0_imask = SWI_CLOCK_MASK; 146 147#ifndef SMP /* XXX per-cpu on smp */ 148struct proc *npxproc; 149#endif 150 151static bool_t npx_ex16; 152static bool_t npx_exists; 153static struct gate_descriptor npx_idt_probeintr; 154static int npx_intrno; 155static volatile u_int npx_intrs_while_probing; 156static bool_t npx_irq13; 157static volatile u_int npx_traps_while_probing; 158 159#ifndef SMP 160/* 161 * Special interrupt handlers. Someday intr0-intr15 will be used to count 162 * interrupts. We'll still need a special exception 16 handler. The busy 163 * latch stuff in probeintr() can be moved to npxprobe(). 164 */ 165inthand_t probeintr; 166 167asm 168(" 169 .text 170 .p2align 2,0x90 171" __XSTRING(CNAME(probeintr)) ": 172 ss 173 incl " __XSTRING(CNAME(npx_intrs_while_probing)) " 174 pushl %eax 175 movb $0x20,%al # EOI (asm in strings loses cpp features) 176 outb %al,$0xa0 # IO_ICU2 177 outb %al,$0x20 # IO_ICU1 178 movb $0,%al 179 outb %al,$0xf0 # clear BUSY# latch 180 popl %eax 181 iret 182"); 183 184inthand_t probetrap; 185asm 186(" 187 .text 188 .p2align 2,0x90 189" __XSTRING(CNAME(probetrap)) ": 190 ss 191 incl " __XSTRING(CNAME(npx_traps_while_probing)) " 192 fnclex 193 iret 194"); 195#endif /* SMP */ 196 197 198/* 199 * Probe routine. Initialize cr0 to give correct behaviour for [f]wait 200 * whether the device exists or not (XXX should be elsewhere). Set flags 201 * to tell npxattach() what to do. Modify device struct if npx doesn't 202 * need to use interrupts. Return 1 if device exists. 203 */ 204static int 205npxprobe(dvp) 206 struct isa_device *dvp; 207{ 208#ifdef SMP 209 210 return npxprobe1(dvp); 211 212#else /* SMP */ 213 214 int result; 215 u_long save_eflags; 216 u_char save_icu1_mask; 217 u_char save_icu2_mask; 218 struct gate_descriptor save_idt_npxintr; 219 struct gate_descriptor save_idt_npxtrap; 220 /* 221 * This routine is now just a wrapper for npxprobe1(), to install 222 * special npx interrupt and trap handlers, to enable npx interrupts 223 * and to disable other interrupts. Someday isa_configure() will 224 * install suitable handlers and run with interrupts enabled so we 225 * won't need to do so much here. 226 */ 227 npx_intrno = NRSVIDT + ffs(dvp->id_irq) - 1; 228 save_eflags = read_eflags(); 229 disable_intr(); 230 save_icu1_mask = inb(IO_ICU1 + 1); 231 save_icu2_mask = inb(IO_ICU2 + 1); 232 save_idt_npxintr = idt[npx_intrno]; 233 save_idt_npxtrap = idt[16]; 234 outb(IO_ICU1 + 1, ~(IRQ_SLAVE | dvp->id_irq)); 235 outb(IO_ICU2 + 1, ~(dvp->id_irq >> 8)); 236 setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 237 setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 238 npx_idt_probeintr = idt[npx_intrno]; 239 enable_intr(); 240 result = npxprobe1(dvp); 241 disable_intr(); 242 outb(IO_ICU1 + 1, save_icu1_mask); 243 outb(IO_ICU2 + 1, save_icu2_mask); 244 idt[npx_intrno] = save_idt_npxintr; 245 idt[16] = save_idt_npxtrap; 246 write_eflags(save_eflags); 247 return (result); 248 249#endif /* SMP */ 250} 251 252static int 253npxprobe1(dvp) 254 struct isa_device *dvp; 255{ 256 u_short control; 257 u_short status; 258 259 /* 260 * Partially reset the coprocessor, if any. Some BIOS's don't reset 261 * it after a warm boot. 262 */ 263 outb(0xf1, 0); /* full reset on some systems, NOP on others */ 264 outb(0xf0, 0); /* clear BUSY# latch */ 265 /* 266 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT 267 * instructions. We must set the CR0_MP bit and use the CR0_TS 268 * bit to control the trap, because setting the CR0_EM bit does 269 * not cause WAIT instructions to trap. It's important to trap 270 * WAIT instructions - otherwise the "wait" variants of no-wait 271 * control instructions would degenerate to the "no-wait" variants 272 * after FP context switches but work correctly otherwise. It's 273 * particularly important to trap WAITs when there is no NPX - 274 * otherwise the "wait" variants would always degenerate. 275 * 276 * Try setting CR0_NE to get correct error reporting on 486DX's. 277 * Setting it should fail or do nothing on lesser processors. 278 */ 279 load_cr0(rcr0() | CR0_MP | CR0_NE); 280 /* 281 * But don't trap while we're probing. 282 */ 283 stop_emulating(); 284 /* 285 * Finish resetting the coprocessor, if any. If there is an error 286 * pending, then we may get a bogus IRQ13, but probeintr() will handle 287 * it OK. Bogus halts have never been observed, but we enabled 288 * IRQ13 and cleared the BUSY# latch early to handle them anyway. 289 */ 290 fninit(); 291 292#ifdef SMP 293 294 /* 295 * Exception 16 MUST work for SMP. 296 */ 297 npx_irq13 = 0; 298 npx_ex16 = hw_float = npx_exists = 1; 299 dvp->id_irq = 0; /* zap the interrupt */ 300 /* 301 * special return value to flag that we do not 302 * actually use any I/O registers 303 */ 304 return (-1); 305 306#else /* SMP */ 307 308 /* 309 * Don't use fwait here because it might hang. 310 * Don't use fnop here because it usually hangs if there is no FPU. 311 */ 312 DELAY(1000); /* wait for any IRQ13 */ 313#ifdef DIAGNOSTIC 314 if (npx_intrs_while_probing != 0) 315 printf("fninit caused %u bogus npx interrupt(s)\n", 316 npx_intrs_while_probing); 317 if (npx_traps_while_probing != 0) 318 printf("fninit caused %u bogus npx trap(s)\n", 319 npx_traps_while_probing); 320#endif 321 /* 322 * Check for a status of mostly zero. 323 */ 324 status = 0x5a5a; 325 fnstsw(&status); 326 if ((status & 0xb8ff) == 0) { 327 /* 328 * Good, now check for a proper control word. 329 */ 330 control = 0x5a5a; 331 fnstcw(&control); 332 if ((control & 0x1f3f) == 0x033f) { 333 hw_float = npx_exists = 1; 334 /* 335 * We have an npx, now divide by 0 to see if exception 336 * 16 works. 337 */ 338 control &= ~(1 << 2); /* enable divide by 0 trap */ 339 fldcw(&control); 340 npx_traps_while_probing = npx_intrs_while_probing = 0; 341 fp_divide_by_0(); 342 if (npx_traps_while_probing != 0) { 343 /* 344 * Good, exception 16 works. 345 */ 346 npx_ex16 = 1; 347 dvp->id_irq = 0; /* zap the interrupt */ 348 /* 349 * special return value to flag that we do not 350 * actually use any I/O registers 351 */ 352 return (-1); 353 } 354 if (npx_intrs_while_probing != 0) { 355 /* 356 * Bad, we are stuck with IRQ13. 357 */ 358 npx_irq13 = 1; 359 /* 360 * npxattach would be too late to set npx0_imask. 361 */ 362 npx0_imask |= dvp->id_irq; 363 return (IO_NPXSIZE); 364 } 365 /* 366 * Worse, even IRQ13 is broken. Use emulator. 367 */ 368 } 369 } 370 /* 371 * Probe failed, but we want to get to npxattach to initialize the 372 * emulator and say that it has been installed. XXX handle devices 373 * that aren't really devices better. 374 */ 375 dvp->id_irq = 0; 376 /* 377 * special return value to flag that we do not 378 * actually use any I/O registers 379 */ 380 return (-1); 381 382#endif /* SMP */ 383} 384 385/* 386 * Attach routine - announce which it is, and wire into system 387 */ 388int 389npxattach(dvp) 390 struct isa_device *dvp; 391{ 392 /* The caller has printed "irq 13" for the npx_irq13 case. */ 393 if (!npx_irq13) { 394 printf("npx%d: ", dvp->id_unit); 395 if (npx_ex16) 396 printf("INT 16 interface\n"); 397#if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE) 398 else if (npx_exists) { 399 printf("error reporting broken; using 387 emulator\n"); 400 hw_float = npx_exists = 0; 401 } else 402 printf("387 emulator\n"); 403#else 404 else 405 printf("no 387 emulator in kernel!\n"); 406#endif 407 } 408 npxinit(__INITIAL_NPXCW__); 409 410#if defined(I586_CPU) 411 if (cpu_class == CPUCLASS_586 && npx_ex16) { 412 if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY)) { 413 bcopy_vector = i586_bcopy; 414 ovbcopy_vector = i586_bcopy; 415 } 416 if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_BZERO)) 417 bzero = i586_bzero; 418 if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) { 419 copyin_vector = i586_copyin; 420 copyout_vector = i586_copyout; 421 } 422 } 423#endif 424 425 return (1); /* XXX unused */ 426} 427 428/* 429 * Initialize floating point unit. 430 */ 431void 432npxinit(control) 433 u_short control; 434{ 435 struct save87 dummy; 436 437 if (!npx_exists) 438 return; 439 /* 440 * fninit has the same h/w bugs as fnsave. Use the detoxified 441 * fnsave to throw away any junk in the fpu. npxsave() initializes 442 * the fpu and sets npxproc = NULL as important side effects. 443 */ 444 npxsave(&dummy); 445 stop_emulating(); 446 fldcw(&control); 447 if (curpcb != NULL) 448 fnsave(&curpcb->pcb_savefpu); 449 start_emulating(); 450} 451 452/* 453 * Free coprocessor (if we have it). 454 */ 455void 456npxexit(p) 457 struct proc *p; 458{ 459 460 if (p == npxproc) 461 npxsave(&curpcb->pcb_savefpu); 462#ifdef NPX_DEBUG 463 if (npx_exists) { 464 u_int masked_exceptions; 465 466 masked_exceptions = curpcb->pcb_savefpu.sv_env.en_cw 467 & curpcb->pcb_savefpu.sv_env.en_sw & 0x7f; 468 /* 469 * Log exceptions that would have trapped with the old 470 * control word (overflow, divide by 0, and invalid operand). 471 */ 472 if (masked_exceptions & 0x0d) 473 log(LOG_ERR, 474 "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", 475 p->p_pid, p->p_comm, masked_exceptions); 476 } 477#endif 478} 479 480/* 481 * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. 482 * 483 * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now 484 * depend on longjmp() restoring a usable state. Restoring the state 485 * or examining it might fail if we didn't clear exceptions. 486 * 487 * XXX there is no standard way to tell SIGFPE handlers about the error 488 * state. The old interface: 489 * 490 * void handler(int sig, int code, struct sigcontext *scp); 491 * 492 * is broken because it is non-ANSI and because the FP state is not in 493 * struct sigcontext. 494 * 495 * XXX the FP state is not preserved across signal handlers. So signal 496 * handlers cannot afford to do FP unless they preserve the state or 497 * longjmp() out. Both preserving the state and longjmp()ing may be 498 * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable 499 * solution for signals other than SIGFPE. 500 */ 501void 502npxintr(unit) 503 int unit; 504{ 505 int code; 506 struct intrframe *frame; 507 508 if (npxproc == NULL || !npx_exists) { 509 printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n", 510 npxproc, curproc, npx_exists); 511 panic("npxintr from nowhere"); 512 } 513 if (npxproc != curproc) { 514 printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n", 515 npxproc, curproc, npx_exists); 516 panic("npxintr from non-current process"); 517 } 518 519 outb(0xf0, 0); 520 fnstsw(&curpcb->pcb_savefpu.sv_ex_sw); 521 fnclex(); 522 fnop(); 523 524 /* 525 * Pass exception to process. 526 */ 527 frame = (struct intrframe *)&unit; /* XXX */ 528 if ((ISPL(frame->if_cs) == SEL_UPL) || (frame->if_eflags & PSL_VM)) { 529 /* 530 * Interrupt is essentially a trap, so we can afford to call 531 * the SIGFPE handler (if any) as soon as the interrupt 532 * returns. 533 * 534 * XXX little or nothing is gained from this, and plenty is 535 * lost - the interrupt frame has to contain the trap frame 536 * (this is otherwise only necessary for the rescheduling trap 537 * in doreti, and the frame for that could easily be set up 538 * just before it is used). 539 */ 540 curproc->p_md.md_regs = (struct trapframe *)&frame->if_es; 541#ifdef notyet 542 /* 543 * Encode the appropriate code for detailed information on 544 * this exception. 545 */ 546 code = XXX_ENCODE(curpcb->pcb_savefpu.sv_ex_sw); 547#else 548 code = 0; /* XXX */ 549#endif 550 trapsignal(curproc, SIGFPE, code); 551 } else { 552 /* 553 * Nested interrupt. These losers occur when: 554 * o an IRQ13 is bogusly generated at a bogus time, e.g.: 555 * o immediately after an fnsave or frstor of an 556 * error state. 557 * o a couple of 386 instructions after 558 * "fstpl _memvar" causes a stack overflow. 559 * These are especially nasty when combined with a 560 * trace trap. 561 * o an IRQ13 occurs at the same time as another higher- 562 * priority interrupt. 563 * 564 * Treat them like a true async interrupt. 565 */ 566 psignal(curproc, SIGFPE); 567 } 568} 569 570/* 571 * Implement device not available (DNA) exception 572 * 573 * It would be better to switch FP context here (if curproc != npxproc) 574 * and not necessarily for every context switch, but it is too hard to 575 * access foreign pcb's. 576 */ 577int 578npxdna() 579{ 580 if (!npx_exists) 581 return (0); 582 if (npxproc != NULL) { 583 printf("npxdna: npxproc = %p, curproc = %p\n", 584 npxproc, curproc); 585 panic("npxdna"); 586 } 587 stop_emulating(); 588 /* 589 * Record new context early in case frstor causes an IRQ13. 590 */ 591 npxproc = curproc; 592 curpcb->pcb_savefpu.sv_ex_sw = 0; 593 /* 594 * The following frstor may cause an IRQ13 when the state being 595 * restored has a pending error. The error will appear to have been 596 * triggered by the current (npx) user instruction even when that 597 * instruction is a no-wait instruction that should not trigger an 598 * error (e.g., fnclex). On at least one 486 system all of the 599 * no-wait instructions are broken the same as frstor, so our 600 * treatment does not amplify the breakage. On at least one 601 * 386/Cyrix 387 system, fnclex works correctly while frstor and 602 * fnsave are broken, so our treatment breaks fnclex if it is the 603 * first FPU instruction after a context switch. 604 */ 605 frstor(&curpcb->pcb_savefpu); 606 607 return (1); 608} 609 610/* 611 * Wrapper for fnsave instruction to handle h/w bugs. If there is an error 612 * pending, then fnsave generates a bogus IRQ13 on some systems. Force 613 * any IRQ13 to be handled immediately, and then ignore it. This routine is 614 * often called at splhigh so it must not use many system services. In 615 * particular, it's much easier to install a special handler than to 616 * guarantee that it's safe to use npxintr() and its supporting code. 617 */ 618void 619npxsave(addr) 620 struct save87 *addr; 621{ 622#ifdef SMP 623 624 stop_emulating(); 625 fnsave(addr); 626 /* fnop(); */ 627 start_emulating(); 628 npxproc = NULL; 629 630#else /* SMP */ 631 632 u_char icu1_mask; 633 u_char icu2_mask; 634 u_char old_icu1_mask; 635 u_char old_icu2_mask; 636 struct gate_descriptor save_idt_npxintr; 637 638 disable_intr(); 639 old_icu1_mask = inb(IO_ICU1 + 1); 640 old_icu2_mask = inb(IO_ICU2 + 1); 641 save_idt_npxintr = idt[npx_intrno]; 642 outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask)); 643 outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8)); 644 idt[npx_intrno] = npx_idt_probeintr; 645 enable_intr(); 646 stop_emulating(); 647 fnsave(addr); 648 fnop(); 649 start_emulating(); 650 npxproc = NULL; 651 disable_intr(); 652 icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */ 653 icu2_mask = inb(IO_ICU2 + 1); 654 outb(IO_ICU1 + 1, 655 (icu1_mask & ~npx0_imask) | (old_icu1_mask & npx0_imask)); 656 outb(IO_ICU2 + 1, 657 (icu2_mask & ~(npx0_imask >> 8)) 658 | (old_icu2_mask & (npx0_imask >> 8))); 659 idt[npx_intrno] = save_idt_npxintr; 660 enable_intr(); /* back to usual state */ 661 662#endif /* SMP */ 663} 664 665#endif /* NNPX > 0 */ 666