npx.c revision 30805
1/*- 2 * Copyright (c) 1990 William Jolitz. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by the University of 17 * California, Berkeley and its contributors. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 35 * $Id: npx.c,v 1.52 1997/08/21 06:32:58 charnier Exp $ 36 */ 37 38#include "npx.h" 39#if NNPX > 0 40 41#include "opt_cpu.h" 42#include "opt_math_emulate.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/kernel.h> 47#include <sys/sysctl.h> 48#include <sys/conf.h> 49#include <sys/proc.h> 50#ifdef NPX_DEBUG 51#include <sys/syslog.h> 52#endif 53#include <sys/signalvar.h> 54 55#include <machine/asmacros.h> 56#include <machine/cputypes.h> 57#include <machine/frame.h> 58#include <machine/ipl.h> 59#include <machine/md_var.h> 60#include <machine/pcb.h> 61#include <machine/psl.h> 62#include <machine/clock.h> 63#include <machine/specialreg.h> 64#include <machine/segments.h> 65 66#include <i386/isa/icu.h> 67#include <i386/isa/isa_device.h> 68#include <i386/isa/intr_machdep.h> 69#include <i386/isa/isa.h> 70 71/* 72 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. 73 */ 74 75/* Configuration flags. */ 76#define NPX_DISABLE_I586_OPTIMIZED_BCOPY (1 << 0) 77#define NPX_DISABLE_I586_OPTIMIZED_BZERO (1 << 1) 78#define NPX_DISABLE_I586_OPTIMIZED_COPYIO (1 << 2) 79 80/* XXX - should be in header file. */ 81extern void (*bcopy_vector) __P((const void *from, void *to, size_t len)); 82extern void (*ovbcopy_vector) __P((const void *from, void *to, size_t len)); 83extern int (*copyin_vector) __P((const void *udaddr, void *kaddr, size_t len)); 84extern int (*copyout_vector) __P((const void *kaddr, void *udaddr, size_t len)); 85 86void i586_bcopy __P((const void *from, void *to, size_t len)); 87void i586_bzero __P((void *buf, size_t len)); 88int i586_copyin __P((const void *udaddr, void *kaddr, size_t len)); 89int i586_copyout __P((const void *kaddr, void *udaddr, size_t len)); 90 91#ifdef __GNUC__ 92 93#define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) 94#define fnclex() __asm("fnclex") 95#define fninit() __asm("fninit") 96#define fnop() __asm("fnop") 97#define fnsave(addr) __asm("fnsave %0" : "=m" (*(addr))) 98#define fnstcw(addr) __asm("fnstcw %0" : "=m" (*(addr))) 99#define fnstsw(addr) __asm("fnstsw %0" : "=m" (*(addr))) 100#define fp_divide_by_0() __asm("fldz; fld1; fdiv %st,%st(1); fnop") 101#define frstor(addr) __asm("frstor %0" : : "m" (*(addr))) 102#define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ 103 : : "n" (CR0_TS) : "ax") 104#define stop_emulating() __asm("clts") 105 106#else /* not __GNUC__ */ 107 108void fldcw __P((caddr_t addr)); 109void fnclex __P((void)); 110void fninit __P((void)); 111void fnop __P((void)); 112void fnsave __P((caddr_t addr)); 113void fnstcw __P((caddr_t addr)); 114void fnstsw __P((caddr_t addr)); 115void fp_divide_by_0 __P((void)); 116void frstor __P((caddr_t addr)); 117void start_emulating __P((void)); 118void stop_emulating __P((void)); 119 120#endif /* __GNUC__ */ 121 122typedef u_char bool_t; 123 124static int npxattach __P((struct isa_device *dvp)); 125static int npxprobe __P((struct isa_device *dvp)); 126static int npxprobe1 __P((struct isa_device *dvp)); 127 128struct isa_driver npxdriver = { 129 npxprobe, npxattach, "npx", 130}; 131 132int hw_float; /* XXX currently just alias for npx_exists */ 133 134SYSCTL_INT(_hw,HW_FLOATINGPT, floatingpoint, 135 CTLFLAG_RD, &hw_float, 0, 136 "Floatingpoint instructions executed in hardware"); 137 138static u_int npx0_imask = SWI_CLOCK_MASK; 139 140#ifndef SMP /* XXX per-cpu on smp */ 141struct proc *npxproc; 142#endif 143 144static bool_t npx_ex16; 145static bool_t npx_exists; 146static struct gate_descriptor npx_idt_probeintr; 147static int npx_intrno; 148static volatile u_int npx_intrs_while_probing; 149static bool_t npx_irq13; 150static volatile u_int npx_traps_while_probing; 151 152#ifndef SMP 153/* 154 * Special interrupt handlers. Someday intr0-intr15 will be used to count 155 * interrupts. We'll still need a special exception 16 handler. The busy 156 * latch stuff in probeintr() can be moved to npxprobe(). 157 */ 158inthand_t probeintr; 159 160asm 161(" 162 .text 163 .p2align 2,0x90 164" __XSTRING(CNAME(probeintr)) ": 165 ss 166 incl " __XSTRING(CNAME(npx_intrs_while_probing)) " 167 pushl %eax 168 movb $0x20,%al # EOI (asm in strings loses cpp features) 169 outb %al,$0xa0 # IO_ICU2 170 outb %al,$0x20 # IO_ICU1 171 movb $0,%al 172 outb %al,$0xf0 # clear BUSY# latch 173 popl %eax 174 iret 175"); 176 177inthand_t probetrap; 178asm 179(" 180 .text 181 .p2align 2,0x90 182" __XSTRING(CNAME(probetrap)) ": 183 ss 184 incl " __XSTRING(CNAME(npx_traps_while_probing)) " 185 fnclex 186 iret 187"); 188#endif /* SMP */ 189 190 191/* 192 * Probe routine. Initialize cr0 to give correct behaviour for [f]wait 193 * whether the device exists or not (XXX should be elsewhere). Set flags 194 * to tell npxattach() what to do. Modify device struct if npx doesn't 195 * need to use interrupts. Return 1 if device exists. 196 */ 197static int 198npxprobe(dvp) 199 struct isa_device *dvp; 200{ 201#ifdef SMP 202 203 return npxprobe1(dvp); 204 205#else /* SMP */ 206 207 int result; 208 u_long save_eflags; 209 u_char save_icu1_mask; 210 u_char save_icu2_mask; 211 struct gate_descriptor save_idt_npxintr; 212 struct gate_descriptor save_idt_npxtrap; 213 /* 214 * This routine is now just a wrapper for npxprobe1(), to install 215 * special npx interrupt and trap handlers, to enable npx interrupts 216 * and to disable other interrupts. Someday isa_configure() will 217 * install suitable handlers and run with interrupts enabled so we 218 * won't need to do so much here. 219 */ 220 npx_intrno = NRSVIDT + ffs(dvp->id_irq) - 1; 221 save_eflags = read_eflags(); 222 disable_intr(); 223 save_icu1_mask = inb(IO_ICU1 + 1); 224 save_icu2_mask = inb(IO_ICU2 + 1); 225 save_idt_npxintr = idt[npx_intrno]; 226 save_idt_npxtrap = idt[16]; 227 outb(IO_ICU1 + 1, ~(IRQ_SLAVE | dvp->id_irq)); 228 outb(IO_ICU2 + 1, ~(dvp->id_irq >> 8)); 229 setidt(16, probetrap, SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 230 setidt(npx_intrno, probeintr, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); 231 npx_idt_probeintr = idt[npx_intrno]; 232 enable_intr(); 233 result = npxprobe1(dvp); 234 disable_intr(); 235 outb(IO_ICU1 + 1, save_icu1_mask); 236 outb(IO_ICU2 + 1, save_icu2_mask); 237 idt[npx_intrno] = save_idt_npxintr; 238 idt[16] = save_idt_npxtrap; 239 write_eflags(save_eflags); 240 return (result); 241 242#endif /* SMP */ 243} 244 245static int 246npxprobe1(dvp) 247 struct isa_device *dvp; 248{ 249 u_short control; 250 u_short status; 251 252 /* 253 * Partially reset the coprocessor, if any. Some BIOS's don't reset 254 * it after a warm boot. 255 */ 256 outb(0xf1, 0); /* full reset on some systems, NOP on others */ 257 outb(0xf0, 0); /* clear BUSY# latch */ 258 /* 259 * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT 260 * instructions. We must set the CR0_MP bit and use the CR0_TS 261 * bit to control the trap, because setting the CR0_EM bit does 262 * not cause WAIT instructions to trap. It's important to trap 263 * WAIT instructions - otherwise the "wait" variants of no-wait 264 * control instructions would degenerate to the "no-wait" variants 265 * after FP context switches but work correctly otherwise. It's 266 * particularly important to trap WAITs when there is no NPX - 267 * otherwise the "wait" variants would always degenerate. 268 * 269 * Try setting CR0_NE to get correct error reporting on 486DX's. 270 * Setting it should fail or do nothing on lesser processors. 271 */ 272 load_cr0(rcr0() | CR0_MP | CR0_NE); 273 /* 274 * But don't trap while we're probing. 275 */ 276 stop_emulating(); 277 /* 278 * Finish resetting the coprocessor, if any. If there is an error 279 * pending, then we may get a bogus IRQ13, but probeintr() will handle 280 * it OK. Bogus halts have never been observed, but we enabled 281 * IRQ13 and cleared the BUSY# latch early to handle them anyway. 282 */ 283 fninit(); 284 285#ifdef SMP 286 287 /* 288 * Exception 16 MUST work for SMP. 289 */ 290 npx_irq13 = 0; 291 npx_ex16 = hw_float = npx_exists = 1; 292 dvp->id_irq = 0; /* zap the interrupt */ 293 /* 294 * special return value to flag that we do not 295 * actually use any I/O registers 296 */ 297 return (-1); 298 299#else /* SMP */ 300 301 /* 302 * Don't use fwait here because it might hang. 303 * Don't use fnop here because it usually hangs if there is no FPU. 304 */ 305 DELAY(1000); /* wait for any IRQ13 */ 306#ifdef DIAGNOSTIC 307 if (npx_intrs_while_probing != 0) 308 printf("fninit caused %u bogus npx interrupt(s)\n", 309 npx_intrs_while_probing); 310 if (npx_traps_while_probing != 0) 311 printf("fninit caused %u bogus npx trap(s)\n", 312 npx_traps_while_probing); 313#endif 314 /* 315 * Check for a status of mostly zero. 316 */ 317 status = 0x5a5a; 318 fnstsw(&status); 319 if ((status & 0xb8ff) == 0) { 320 /* 321 * Good, now check for a proper control word. 322 */ 323 control = 0x5a5a; 324 fnstcw(&control); 325 if ((control & 0x1f3f) == 0x033f) { 326 hw_float = npx_exists = 1; 327 /* 328 * We have an npx, now divide by 0 to see if exception 329 * 16 works. 330 */ 331 control &= ~(1 << 2); /* enable divide by 0 trap */ 332 fldcw(&control); 333 npx_traps_while_probing = npx_intrs_while_probing = 0; 334 fp_divide_by_0(); 335 if (npx_traps_while_probing != 0) { 336 /* 337 * Good, exception 16 works. 338 */ 339 npx_ex16 = 1; 340 dvp->id_irq = 0; /* zap the interrupt */ 341 /* 342 * special return value to flag that we do not 343 * actually use any I/O registers 344 */ 345 return (-1); 346 } 347 if (npx_intrs_while_probing != 0) { 348 /* 349 * Bad, we are stuck with IRQ13. 350 */ 351 npx_irq13 = 1; 352 /* 353 * npxattach would be too late to set npx0_imask. 354 */ 355 npx0_imask |= dvp->id_irq; 356 return (IO_NPXSIZE); 357 } 358 /* 359 * Worse, even IRQ13 is broken. Use emulator. 360 */ 361 } 362 } 363 /* 364 * Probe failed, but we want to get to npxattach to initialize the 365 * emulator and say that it has been installed. XXX handle devices 366 * that aren't really devices better. 367 */ 368 dvp->id_irq = 0; 369 /* 370 * special return value to flag that we do not 371 * actually use any I/O registers 372 */ 373 return (-1); 374 375#endif /* SMP */ 376} 377 378/* 379 * Attach routine - announce which it is, and wire into system 380 */ 381int 382npxattach(dvp) 383 struct isa_device *dvp; 384{ 385 /* The caller has printed "irq 13" for the npx_irq13 case. */ 386 if (!npx_irq13) { 387 printf("npx%d: ", dvp->id_unit); 388 if (npx_ex16) 389 printf("INT 16 interface\n"); 390#if defined(MATH_EMULATE) || defined(GPL_MATH_EMULATE) 391 else if (npx_exists) { 392 printf("error reporting broken; using 387 emulator\n"); 393 hw_float = npx_exists = 0; 394 } else 395 printf("387 emulator\n"); 396#else 397 else 398 printf("no 387 emulator in kernel!\n"); 399#endif 400 } 401 npxinit(__INITIAL_NPXCW__); 402 403#if defined(I586_CPU) 404 if (cpu_class == CPUCLASS_586 && npx_ex16) { 405 if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_BCOPY)) { 406 bcopy_vector = i586_bcopy; 407 ovbcopy_vector = i586_bcopy; 408 } 409 if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_BZERO)) 410 bzero = i586_bzero; 411 if (!(dvp->id_flags & NPX_DISABLE_I586_OPTIMIZED_COPYIO)) { 412 copyin_vector = i586_copyin; 413 copyout_vector = i586_copyout; 414 } 415 } 416#endif 417 418 return (1); /* XXX unused */ 419} 420 421/* 422 * Initialize floating point unit. 423 */ 424void 425npxinit(control) 426 u_short control; 427{ 428 struct save87 dummy; 429 430 if (!npx_exists) 431 return; 432 /* 433 * fninit has the same h/w bugs as fnsave. Use the detoxified 434 * fnsave to throw away any junk in the fpu. npxsave() initializes 435 * the fpu and sets npxproc = NULL as important side effects. 436 */ 437 npxsave(&dummy); 438 stop_emulating(); 439 fldcw(&control); 440 if (curpcb != NULL) 441 fnsave(&curpcb->pcb_savefpu); 442 start_emulating(); 443} 444 445/* 446 * Free coprocessor (if we have it). 447 */ 448void 449npxexit(p) 450 struct proc *p; 451{ 452 453 if (p == npxproc) 454 npxsave(&curpcb->pcb_savefpu); 455#ifdef NPX_DEBUG 456 if (npx_exists) { 457 u_int masked_exceptions; 458 459 masked_exceptions = curpcb->pcb_savefpu.sv_env.en_cw 460 & curpcb->pcb_savefpu.sv_env.en_sw & 0x7f; 461 /* 462 * Log exceptions that would have trapped with the old 463 * control word (overflow, divide by 0, and invalid operand). 464 */ 465 if (masked_exceptions & 0x0d) 466 log(LOG_ERR, 467 "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", 468 p->p_pid, p->p_comm, masked_exceptions); 469 } 470#endif 471} 472 473/* 474 * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. 475 * 476 * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now 477 * depend on longjmp() restoring a usable state. Restoring the state 478 * or examining it might fail if we didn't clear exceptions. 479 * 480 * XXX there is no standard way to tell SIGFPE handlers about the error 481 * state. The old interface: 482 * 483 * void handler(int sig, int code, struct sigcontext *scp); 484 * 485 * is broken because it is non-ANSI and because the FP state is not in 486 * struct sigcontext. 487 * 488 * XXX the FP state is not preserved across signal handlers. So signal 489 * handlers cannot afford to do FP unless they preserve the state or 490 * longjmp() out. Both preserving the state and longjmp()ing may be 491 * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable 492 * solution for signals other than SIGFPE. 493 */ 494void 495npxintr(unit) 496 int unit; 497{ 498 int code; 499 struct intrframe *frame; 500 501 if (npxproc == NULL || !npx_exists) { 502 printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n", 503 npxproc, curproc, npx_exists); 504 panic("npxintr from nowhere"); 505 } 506 if (npxproc != curproc) { 507 printf("npxintr: npxproc = %p, curproc = %p, npx_exists = %d\n", 508 npxproc, curproc, npx_exists); 509 panic("npxintr from non-current process"); 510 } 511 512 outb(0xf0, 0); 513 fnstsw(&curpcb->pcb_savefpu.sv_ex_sw); 514 fnclex(); 515 fnop(); 516 517 /* 518 * Pass exception to process. 519 */ 520 frame = (struct intrframe *)&unit; /* XXX */ 521 if ((ISPL(frame->if_cs) == SEL_UPL) || (frame->if_eflags & PSL_VM)) { 522 /* 523 * Interrupt is essentially a trap, so we can afford to call 524 * the SIGFPE handler (if any) as soon as the interrupt 525 * returns. 526 * 527 * XXX little or nothing is gained from this, and plenty is 528 * lost - the interrupt frame has to contain the trap frame 529 * (this is otherwise only necessary for the rescheduling trap 530 * in doreti, and the frame for that could easily be set up 531 * just before it is used). 532 */ 533 curproc->p_md.md_regs = (struct trapframe *)&frame->if_es; 534#ifdef notyet 535 /* 536 * Encode the appropriate code for detailed information on 537 * this exception. 538 */ 539 code = XXX_ENCODE(curpcb->pcb_savefpu.sv_ex_sw); 540#else 541 code = 0; /* XXX */ 542#endif 543 trapsignal(curproc, SIGFPE, code); 544 } else { 545 /* 546 * Nested interrupt. These losers occur when: 547 * o an IRQ13 is bogusly generated at a bogus time, e.g.: 548 * o immediately after an fnsave or frstor of an 549 * error state. 550 * o a couple of 386 instructions after 551 * "fstpl _memvar" causes a stack overflow. 552 * These are especially nasty when combined with a 553 * trace trap. 554 * o an IRQ13 occurs at the same time as another higher- 555 * priority interrupt. 556 * 557 * Treat them like a true async interrupt. 558 */ 559 psignal(curproc, SIGFPE); 560 } 561} 562 563/* 564 * Implement device not available (DNA) exception 565 * 566 * It would be better to switch FP context here (if curproc != npxproc) 567 * and not necessarily for every context switch, but it is too hard to 568 * access foreign pcb's. 569 */ 570int 571npxdna() 572{ 573 if (!npx_exists) 574 return (0); 575 if (npxproc != NULL) { 576 printf("npxdna: npxproc = %p, curproc = %p\n", 577 npxproc, curproc); 578 panic("npxdna"); 579 } 580 stop_emulating(); 581 /* 582 * Record new context early in case frstor causes an IRQ13. 583 */ 584 npxproc = curproc; 585 curpcb->pcb_savefpu.sv_ex_sw = 0; 586 /* 587 * The following frstor may cause an IRQ13 when the state being 588 * restored has a pending error. The error will appear to have been 589 * triggered by the current (npx) user instruction even when that 590 * instruction is a no-wait instruction that should not trigger an 591 * error (e.g., fnclex). On at least one 486 system all of the 592 * no-wait instructions are broken the same as frstor, so our 593 * treatment does not amplify the breakage. On at least one 594 * 386/Cyrix 387 system, fnclex works correctly while frstor and 595 * fnsave are broken, so our treatment breaks fnclex if it is the 596 * first FPU instruction after a context switch. 597 */ 598 frstor(&curpcb->pcb_savefpu); 599 600 return (1); 601} 602 603/* 604 * Wrapper for fnsave instruction to handle h/w bugs. If there is an error 605 * pending, then fnsave generates a bogus IRQ13 on some systems. Force 606 * any IRQ13 to be handled immediately, and then ignore it. This routine is 607 * often called at splhigh so it must not use many system services. In 608 * particular, it's much easier to install a special handler than to 609 * guarantee that it's safe to use npxintr() and its supporting code. 610 */ 611void 612npxsave(addr) 613 struct save87 *addr; 614{ 615#ifdef SMP 616 617 stop_emulating(); 618 fnsave(addr); 619 /* fnop(); */ 620 start_emulating(); 621 npxproc = NULL; 622 623#else /* SMP */ 624 625 u_char icu1_mask; 626 u_char icu2_mask; 627 u_char old_icu1_mask; 628 u_char old_icu2_mask; 629 struct gate_descriptor save_idt_npxintr; 630 631 disable_intr(); 632 old_icu1_mask = inb(IO_ICU1 + 1); 633 old_icu2_mask = inb(IO_ICU2 + 1); 634 save_idt_npxintr = idt[npx_intrno]; 635 outb(IO_ICU1 + 1, old_icu1_mask & ~(IRQ_SLAVE | npx0_imask)); 636 outb(IO_ICU2 + 1, old_icu2_mask & ~(npx0_imask >> 8)); 637 idt[npx_intrno] = npx_idt_probeintr; 638 enable_intr(); 639 stop_emulating(); 640 fnsave(addr); 641 fnop(); 642 start_emulating(); 643 npxproc = NULL; 644 disable_intr(); 645 icu1_mask = inb(IO_ICU1 + 1); /* masks may have changed */ 646 icu2_mask = inb(IO_ICU2 + 1); 647 outb(IO_ICU1 + 1, 648 (icu1_mask & ~npx0_imask) | (old_icu1_mask & npx0_imask)); 649 outb(IO_ICU2 + 1, 650 (icu2_mask & ~(npx0_imask >> 8)) 651 | (old_icu2_mask & (npx0_imask >> 8))); 652 idt[npx_intrno] = save_idt_npxintr; 653 enable_intr(); /* back to usual state */ 654 655#endif /* SMP */ 656} 657 658#endif /* NNPX > 0 */ 659