1/* $NetBSD: npx.c,v 1.142 2011/08/11 18:05:11 cherry Exp $ */ 2 3/*- 4 * Copyright (c) 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software developed for The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/*- 33 * Copyright (c) 1991 The Regents of the University of California. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)npx.c 7.2 (Berkeley) 5/12/91 61 */ 62 63/*- 64 * Copyright (c) 1994, 1995, 1998 Charles M. Hannum. All rights reserved. 65 * Copyright (c) 1990 William Jolitz. 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 3. All advertising materials mentioning features or use of this software 76 * must display the following acknowledgement: 77 * This product includes software developed by the University of 78 * California, Berkeley and its contributors. 79 * 4. Neither the name of the University nor the names of its contributors 80 * may be used to endorse or promote products derived from this software 81 * without specific prior written permission. 82 * 83 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 84 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 85 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 86 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 87 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 88 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 89 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 90 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 91 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 92 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 93 * SUCH DAMAGE. 94 * 95 * @(#)npx.c 7.2 (Berkeley) 5/12/91 96 */ 97 98#include <sys/cdefs.h> 99__KERNEL_RCSID(0, "$NetBSD: npx.c,v 1.142 2011/08/11 18:05:11 cherry Exp $"); 100 101#if 0 102#define IPRINTF(x) printf x 103#else 104#define IPRINTF(x) 105#endif 106 107#include "opt_multiprocessor.h" 108#include "opt_xen.h" 109 110#include <sys/param.h> 111#include <sys/systm.h> 112#include <sys/conf.h> 113#include <sys/cpu.h> 114#include <sys/file.h> 115#include <sys/proc.h> 116#include <sys/ioctl.h> 117#include <sys/device.h> 118#include <sys/vmmeter.h> 119#include <sys/kernel.h> 120#include <sys/bus.h> 121#include <sys/cpu.h> 122#include <sys/intr.h> 123 124#include <uvm/uvm_extern.h> 125 126#include <machine/cpufunc.h> 127#include <machine/cpuvar.h> 128#include <machine/pcb.h> 129#include <machine/trap.h> 130#include <machine/specialreg.h> 131#include <machine/pio.h> 132#include <machine/i8259.h> 133 134#include <dev/isa/isareg.h> 135#include <dev/isa/isavar.h> 136 137#include <i386/isa/npxvar.h> 138 139/* 140 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. 141 * 142 * We do lazy initialization and switching using the TS bit in cr0 and the 143 * MDL_USEDFPU bit in mdlwp. 144 * 145 * DNA exceptions are handled like this: 146 * 147 * 1) If there is no NPX, return and go to the emulator. 148 * 2) If someone else has used the NPX, save its state into that process's PCB. 149 * 3a) If MDL_USEDFPU is not set, set it and initialize the NPX. 150 * 3b) Otherwise, reload the process's previous NPX state. 151 * 152 * When a process is created or exec()s, its saved cr0 image has the TS bit 153 * set and the MDL_USEDFPU bit clear. The MDL_USEDFPU bit is set when the 154 * process first gets a DNA and the NPX is initialized. The TS bit is turned 155 * off when the NPX is used, and turned on again later when the process's NPX 156 * state is saved. 157 */ 158 159static int x86fpflags_to_ksiginfo(uint32_t flags); 160static int npxdna(struct cpu_info *); 161 162#ifdef XEN 163#define clts() HYPERVISOR_fpu_taskswitch(0) 164#define stts() HYPERVISOR_fpu_taskswitch(1) 165#endif 166 167static enum npx_type npx_type; 168volatile u_int npx_intrs_while_probing; 169volatile u_int npx_traps_while_probing; 170 171extern int i386_fpu_present; 172extern int i386_fpu_exception; 173extern int i386_fpu_fdivbug; 174 175struct npx_softc *npx_softc; 176 177static inline void 178fpu_save(union savefpu *addr) 179{ 180 if (i386_use_fxsave) 181 { 182 fxsave(&addr->sv_xmm); 183 184 /* FXSAVE doesn't FNINIT like FNSAVE does -- so do it here. */ 185 fninit(); 186 } else 187 fnsave(&addr->sv_87); 188} 189 190static int 191npxdna_empty(struct cpu_info *ci) 192{ 193 194 panic("npxdna vector not initialized"); 195 return 0; 196} 197 198 199int (*npxdna_func)(struct cpu_info *) = npxdna_empty; 200 201#ifndef XEN 202/* 203 * This calls i8259_* directly, but currently we can count on systems 204 * having a i8259 compatible setup all the time. Maybe have to change 205 * that in the future. 206 */ 207enum npx_type 208npxprobe1(bus_space_tag_t iot, bus_space_handle_t ioh, int irq) 209{ 210 struct gate_descriptor save_idt_npxintr; 211 struct gate_descriptor save_idt_npxtrap; 212 enum npx_type rv = NPX_NONE; 213 u_long save_eflags; 214 int control; 215 int status; 216 unsigned irqmask; 217 218 if (cpu_feature[0] & CPUID_FPU) { 219 i386_fpu_exception = 1; 220 return NPX_CPUID; 221 } 222 save_eflags = x86_read_psl(); 223 x86_disable_intr(); 224 save_idt_npxintr = idt[NRSVIDT + irq]; 225 save_idt_npxtrap = idt[16]; 226 setgate(&idt[NRSVIDT + irq], probeintr, 0, SDT_SYS386IGT, SEL_KPL, 227 GSEL(GCODE_SEL, SEL_KPL)); 228 setgate(&idt[16], probetrap, 0, SDT_SYS386TGT, SEL_KPL, 229 GSEL(GCODE_SEL, SEL_KPL)); 230 231 irqmask = i8259_setmask(~((1 << IRQ_SLAVE) | (1 << irq))); 232 233 /* 234 * Partially reset the coprocessor, if any. Some BIOS's don't reset 235 * it after a warm boot. 236 */ 237 /* full reset on some systems, NOP on others */ 238 bus_space_write_1(iot, ioh, 1, 0); 239 delay(1000); 240 /* clear BUSY# latch */ 241 bus_space_write_1(iot, ioh, 0, 0); 242 243 /* 244 * We set CR0 in locore to trap all ESC and WAIT instructions. 245 * We have to turn off the CR0_EM bit temporarily while probing. 246 */ 247 lcr0(rcr0() & ~(CR0_EM|CR0_TS)); 248 x86_enable_intr(); 249 250 /* 251 * Finish resetting the coprocessor, if any. If there is an error 252 * pending, then we may get a bogus IRQ13, but probeintr() will handle 253 * it OK. Bogus halts have never been observed, but we enabled 254 * IRQ13 and cleared the BUSY# latch early to handle them anyway. 255 */ 256 fninit(); 257 delay(1000); /* wait for any IRQ13 (fwait might hang) */ 258 259 /* 260 * Check for a status of mostly zero. 261 */ 262 status = 0x5a5a; 263 fnstsw(&status); 264 if ((status & 0xb8ff) == 0) { 265 /* 266 * Good, now check for a proper control word. 267 */ 268 control = 0x5a5a; 269 fnstcw(&control); 270 if ((control & 0x1f3f) == 0x033f) { 271 /* 272 * We have an npx, now divide by 0 to see if exception 273 * 16 works. 274 */ 275 control &= ~(1 << 2); /* enable divide by 0 trap */ 276 fldcw(&control); 277 npx_traps_while_probing = npx_intrs_while_probing = 0; 278 fp_divide_by_0(); 279 if (npx_traps_while_probing != 0) { 280 /* 281 * Good, exception 16 works. 282 */ 283 rv = NPX_EXCEPTION; 284 i386_fpu_exception = 1; 285 } else if (npx_intrs_while_probing != 0) { 286 /* 287 * Bad, we are stuck with IRQ13. 288 */ 289 rv = NPX_INTERRUPT; 290 } else { 291 /* 292 * Worse, even IRQ13 is broken. Use emulator. 293 */ 294 rv = NPX_BROKEN; 295 } 296 } 297 } 298 299 x86_disable_intr(); 300 lcr0(rcr0() | (CR0_EM|CR0_TS)); 301 302 irqmask = i8259_setmask(irqmask); 303 304 idt[NRSVIDT + irq] = save_idt_npxintr; 305 306 idt[16] = save_idt_npxtrap; 307 x86_write_psl(save_eflags); 308 309 return (rv); 310} 311 312void npxinit(struct cpu_info *ci) 313{ 314 lcr0(rcr0() & ~(CR0_EM|CR0_TS)); 315 fninit(); 316 if (npx586bug1(4195835, 3145727) != 0) { 317 i386_fpu_fdivbug = 1; 318 aprint_normal_dev(ci->ci_dev, 319 "WARNING: Pentium FDIV bug detected!\n"); 320 } 321 lcr0(rcr0() | (CR0_TS)); 322} 323#endif 324 325/* 326 * Common attach routine. 327 */ 328void 329npxattach(struct npx_softc *sc) 330{ 331 332 npx_softc = sc; 333 npx_type = sc->sc_type; 334 335#ifndef XEN 336 npxinit(&cpu_info_primary); 337#endif 338 i386_fpu_present = 1; 339 npxdna_func = npxdna; 340 341 if (!pmf_device_register(sc->sc_dev, NULL, NULL)) 342 aprint_error_dev(sc->sc_dev, "couldn't establish power handler\n"); 343} 344 345int 346npxdetach(device_t self, int flags) 347{ 348 struct npx_softc *sc = device_private(self); 349 350 if (sc->sc_type == NPX_INTERRUPT) 351 return EBUSY; 352 353 pmf_device_deregister(self); 354 355 return 0; 356} 357 358/* 359 * Record the FPU state and reinitialize it all except for the control word. 360 * Then generate a SIGFPE. 361 * 362 * Reinitializing the state allows naive SIGFPE handlers to longjmp without 363 * doing any fixups. 364 * 365 * XXX there is currently no way to pass the full error state to signal 366 * handlers, and if this is a nested interrupt there is no way to pass even 367 * a status code! So there is no way to have a non-naive SIGFPE handler. At 368 * best a handler could do an fninit followed by an fldcw of a static value. 369 * fnclex would be of little use because it would leave junk on the FPU stack. 370 * Returning from the handler would be even less safe than usual because 371 * IRQ13 exception handling makes exceptions even less precise than usual. 372 */ 373int 374npxintr(void *arg, struct intrframe *frame) 375{ 376 struct cpu_info *ci = curcpu(); 377 struct lwp *l = ci->ci_fpcurlwp; 378 union savefpu *addr; 379 struct npx_softc *sc; 380 struct pcb *pcb; 381 ksiginfo_t ksi; 382 383 sc = npx_softc; 384 385 kpreempt_disable(); 386#ifndef XEN 387 KASSERT((x86_read_psl() & PSL_I) == 0); 388 x86_enable_intr(); 389#endif 390 391 curcpu()->ci_data.cpu_ntrap++; 392 IPRINTF(("%s: fp intr\n", device_xname(ci->ci_dev))); 393 394#ifndef XEN 395 /* 396 * Clear the interrupt latch. 397 */ 398 if (sc->sc_type == NPX_INTERRUPT) 399 bus_space_write_1(sc->sc_iot, sc->sc_ioh, 0, 0); 400#endif 401 402 /* 403 * If we're saving, ignore the interrupt. The FPU will generate 404 * another one when we restore the state later. 405 */ 406 if (ci->ci_fpsaving) { 407 kpreempt_enable(); 408 return (1); 409 } 410 411 if (l == NULL || npx_type == NPX_NONE) { 412 printf("npxintr: l = %p, curproc = %p, npx_type = %d\n", 413 l, curproc, npx_type); 414 printf("npxintr: came from nowhere"); 415 kpreempt_enable(); 416 return 1; 417 } 418 419 /* 420 * At this point, fpcurlwp should be curlwp. If it wasn't, the TS 421 * bit should be set, and we should have gotten a DNA exception. 422 */ 423 KASSERT(l == curlwp); 424 pcb = lwp_getpcb(l); 425 426 /* 427 * Find the address of fpcurproc's saved FPU state. (Given the 428 * invariant above, this is always the one in curpcb.) 429 */ 430 addr = &pcb->pcb_savefpu; 431 432 /* 433 * Save state. This does an implied fninit. It had better not halt 434 * the CPU or we'll hang. 435 */ 436 fpu_save(addr); 437 fwait(); 438 if (i386_use_fxsave) { 439 fldcw(&addr->sv_xmm.sv_env.en_cw); 440 /* 441 * FNINIT doesn't affect MXCSR or the XMM registers; 442 * no need to re-load MXCSR here. 443 */ 444 } else 445 fldcw(&addr->sv_87.sv_env.en_cw); 446 fwait(); 447 /* 448 * Remember the exception status word and tag word. The current 449 * (almost fninit'ed) fpu state is in the fpu and the exception 450 * state just saved will soon be junk. However, the implied fninit 451 * doesn't change the error pointers or register contents, and we 452 * preserved the control word and will copy the status and tag 453 * words, so the complete exception state can be recovered. 454 */ 455 if (i386_use_fxsave) { 456 addr->sv_xmm.sv_ex_sw = addr->sv_xmm.sv_env.en_sw; 457 addr->sv_xmm.sv_ex_tw = addr->sv_xmm.sv_env.en_tw; 458 } else { 459 addr->sv_87.sv_ex_sw = addr->sv_87.sv_env.en_sw; 460 addr->sv_87.sv_ex_tw = addr->sv_87.sv_env.en_tw; 461 } 462 /* 463 * Pass exception to process. 464 */ 465 if (USERMODE(frame->if_cs, frame->if_eflags)) { 466 /* 467 * Interrupt is essentially a trap, so we can afford to call 468 * the SIGFPE handler (if any) as soon as the interrupt 469 * returns. 470 * 471 * XXX little or nothing is gained from this, and plenty is 472 * lost - the interrupt frame has to contain the trap frame 473 * (this is otherwise only necessary for the rescheduling trap 474 * in doreti, and the frame for that could easily be set up 475 * just before it is used). 476 */ 477 l->l_md.md_regs = (struct trapframe *)&frame->if_gs; 478 479 KSI_INIT_TRAP(&ksi); 480 ksi.ksi_signo = SIGFPE; 481 ksi.ksi_addr = (void *)frame->if_eip; 482 483 /* 484 * Encode the appropriate code for detailed information on 485 * this exception. 486 */ 487 488 if (i386_use_fxsave) { 489 ksi.ksi_code = 490 x86fpflags_to_ksiginfo(addr->sv_xmm.sv_ex_sw); 491 ksi.ksi_trap = (int)addr->sv_xmm.sv_ex_sw; 492 } else { 493 ksi.ksi_code = 494 x86fpflags_to_ksiginfo(addr->sv_87.sv_ex_sw); 495 ksi.ksi_trap = (int)addr->sv_87.sv_ex_sw; 496 } 497 498 trapsignal(l, &ksi); 499 } else { 500 /* 501 * This is a nested interrupt. This should only happen when 502 * an IRQ13 occurs at the same time as a higher-priority 503 * interrupt. 504 * 505 * XXX 506 * Currently, we treat this like an asynchronous interrupt, but 507 * this has disadvantages. 508 */ 509 mutex_enter(proc_lock); 510 psignal(l->l_proc, SIGFPE); 511 mutex_exit(proc_lock); 512 } 513 514 kpreempt_enable(); 515 return (1); 516} 517 518/* map x86 fp flags to ksiginfo fp codes */ 519/* see table 8-4 of the IA-32 Intel Architecture */ 520/* Software Developer's Manual, Volume 1 */ 521/* XXX punting on the stack fault with FLTINV */ 522static int 523x86fpflags_to_ksiginfo(uint32_t flags) 524{ 525 int i; 526 static int x86fp_ksiginfo_table[] = { 527 FPE_FLTINV, /* bit 0 - invalid operation */ 528 FPE_FLTRES, /* bit 1 - denormal operand */ 529 FPE_FLTDIV, /* bit 2 - divide by zero */ 530 FPE_FLTOVF, /* bit 3 - fp overflow */ 531 FPE_FLTUND, /* bit 4 - fp underflow */ 532 FPE_FLTRES, /* bit 5 - fp precision */ 533 FPE_FLTINV, /* bit 6 - stack fault */ 534 }; 535 536 for(i=0;i < sizeof(x86fp_ksiginfo_table)/sizeof(int); i++) { 537 if (flags & (1 << i)) 538 return(x86fp_ksiginfo_table[i]); 539 } 540 /* punt if flags not set */ 541 return(0); 542} 543 544/* 545 * Implement device not available (DNA) exception 546 * 547 * If we were the last lwp to use the FPU, we can simply return. 548 * Otherwise, we save the previous state, if necessary, and restore 549 * our last saved state. 550 */ 551static int 552npxdna(struct cpu_info *ci) 553{ 554 struct lwp *l, *fl; 555 struct pcb *pcb; 556 int s; 557 558 if (ci->ci_fpsaving) { 559 /* Recursive trap. */ 560 return 1; 561 } 562 563 /* Lock out IPIs and disable preemption. */ 564 s = splhigh(); 565#ifndef XEN 566 x86_enable_intr(); 567#endif 568 /* Save state on current CPU. */ 569 l = ci->ci_curlwp; 570 pcb = lwp_getpcb(l); 571 572 fl = ci->ci_fpcurlwp; 573 if (fl != NULL) { 574 /* 575 * It seems we can get here on Xen even if we didn't 576 * switch lwp. In this case do nothing 577 */ 578 if (fl == l) { 579 KASSERT(pcb->pcb_fpcpu == ci); 580 ci->ci_fpused = 1; 581 clts(); 582 splx(s); 583 return 1; 584 } 585 KASSERT(fl != l); 586 npxsave_cpu(true); 587 KASSERT(ci->ci_fpcurlwp == NULL); 588 } 589 590 /* Save our state if on a remote CPU. */ 591 if (pcb->pcb_fpcpu != NULL) { 592 /* Explicitly disable preemption before dropping spl. */ 593 KPREEMPT_DISABLE(l); 594 splx(s); 595 npxsave_lwp(l, true); 596 KASSERT(pcb->pcb_fpcpu == NULL); 597 s = splhigh(); 598 KPREEMPT_ENABLE(l); 599 } 600 601 /* 602 * Restore state on this CPU, or initialize. Ensure that 603 * the entire update is atomic with respect to FPU-sync IPIs. 604 */ 605 clts(); 606 ci->ci_fpcurlwp = l; 607 pcb->pcb_fpcpu = ci; 608 ci->ci_fpused = 1; 609 610 if ((l->l_md.md_flags & MDL_USEDFPU) == 0) { 611 fninit(); 612 if (i386_use_fxsave) { 613 fldcw(&pcb->pcb_savefpu. 614 sv_xmm.sv_env.en_cw); 615 } else { 616 fldcw(&pcb->pcb_savefpu. 617 sv_87.sv_env.en_cw); 618 } 619 l->l_md.md_flags |= MDL_USEDFPU; 620 } else if (i386_use_fxsave) { 621 /* 622 * AMD FPU's do not restore FIP, FDP, and FOP on fxrstor, 623 * leaking other process's execution history. Clear them 624 * manually. 625 */ 626 static const double zero = 0.0; 627 int status; 628 /* 629 * Clear the ES bit in the x87 status word if it is currently 630 * set, in order to avoid causing a fault in the upcoming load. 631 */ 632 fnstsw(&status); 633 if (status & 0x80) 634 fnclex(); 635 /* 636 * Load the dummy variable into the x87 stack. This mangles 637 * the x87 stack, but we don't care since we're about to call 638 * fxrstor() anyway. 639 */ 640 fldummy(&zero); 641 fxrstor(&pcb->pcb_savefpu.sv_xmm); 642 } else { 643 frstor(&pcb->pcb_savefpu.sv_87); 644 } 645 646 KASSERT(ci == curcpu()); 647 splx(s); 648 return 1; 649} 650 651/* 652 * Save current CPU's FPU state. Must be called at IPL_HIGH. 653 */ 654void 655npxsave_cpu(bool save) 656{ 657 struct cpu_info *ci; 658 struct lwp *l; 659 struct pcb *pcb; 660 661 KASSERT(curcpu()->ci_ilevel == IPL_HIGH); 662 663 ci = curcpu(); 664 l = ci->ci_fpcurlwp; 665 if (l == NULL) 666 return; 667 668 pcb = lwp_getpcb(l); 669 670 if (save) { 671 /* 672 * Set ci->ci_fpsaving, so that any pending exception will 673 * be thrown away. It will be caught again if/when the 674 * FPU state is restored. 675 */ 676 KASSERT(ci->ci_fpsaving == 0); 677 clts(); 678 ci->ci_fpsaving = 1; 679 if (i386_use_fxsave) { 680 fxsave(&pcb->pcb_savefpu.sv_xmm); 681 } else { 682 fnsave(&pcb->pcb_savefpu.sv_87); 683 } 684 ci->ci_fpsaving = 0; 685 } 686 687 stts(); 688 pcb->pcb_fpcpu = NULL; 689 ci->ci_fpcurlwp = NULL; 690 ci->ci_fpused = 1; 691} 692 693/* 694 * Save l's FPU state, which may be on this processor or another processor. 695 * It may take some time, so we avoid disabling preemption where possible. 696 * Caller must know that the target LWP is stopped, otherwise this routine 697 * may race against it. 698 */ 699void 700npxsave_lwp(struct lwp *l, bool save) 701{ 702 struct cpu_info *oci; 703 struct pcb *pcb; 704 int s, spins, ticks; 705 706 spins = 0; 707 ticks = hardclock_ticks; 708 for (;;) { 709 s = splhigh(); 710 pcb = lwp_getpcb(l); 711 oci = pcb->pcb_fpcpu; 712 if (oci == NULL) { 713 splx(s); 714 break; 715 } 716 if (oci == curcpu()) { 717 KASSERT(oci->ci_fpcurlwp == l); 718 npxsave_cpu(save); 719 splx(s); 720 break; 721 } 722 splx(s); 723#ifdef XEN 724 if (xen_send_ipi(oci, XEN_IPI_SYNCH_FPU) != 0) { 725 panic("xen_send_ipi(%s, XEN_IPI_SYNCH_FPU) failed.", 726 cpu_name(oci)); 727 } 728#else /* XEN */ 729 x86_send_ipi(oci, X86_IPI_SYNCH_FPU); 730#endif 731 while (pcb->pcb_fpcpu == oci && 732 ticks == hardclock_ticks) { 733 x86_pause(); 734 spins++; 735 } 736 if (spins > 100000000) { 737 panic("npxsave_lwp: did not"); 738 } 739 } 740 741 if (!save) { 742 /* Ensure we restart with a clean slate. */ 743 l->l_md.md_flags &= ~MDL_USEDFPU; 744 } 745} 746 747/* 748 * The following mechanism is used to ensure that the FPE_... value 749 * that is passed as a trapcode to the signal handler of the user 750 * process does not have more than one bit set. 751 * 752 * Multiple bits may be set if the user process modifies the control 753 * word while a status word bit is already set. While this is a sign 754 * of bad coding, we have no choise than to narrow them down to one 755 * bit, since we must not send a trapcode that is not exactly one of 756 * the FPE_ macros. 757 * 758 * The mechanism has a static table with 127 entries. Each combination 759 * of the 7 FPU status word exception bits directly translates to a 760 * position in this table, where a single FPE_... value is stored. 761 * This FPE_... value stored there is considered the "most important" 762 * of the exception bits and will be sent as the signal code. The 763 * precedence of the bits is based upon Intel Document "Numerical 764 * Applications", Chapter "Special Computational Situations". 765 * 766 * The macro to choose one of these values does these steps: 1) Throw 767 * away status word bits that cannot be masked. 2) Throw away the bits 768 * currently masked in the control word, assuming the user isn't 769 * interested in them anymore. 3) Reinsert status word bit 7 (stack 770 * fault) if it is set, which cannot be masked but must be presered. 771 * 4) Use the remaining bits to point into the trapcode table. 772 * 773 * The 6 maskable bits in order of their preference, as stated in the 774 * above referenced Intel manual: 775 * 1 Invalid operation (FP_X_INV) 776 * 1a Stack underflow 777 * 1b Stack overflow 778 * 1c Operand of unsupported format 779 * 1d SNaN operand. 780 * 2 QNaN operand (not an exception, irrelavant here) 781 * 3 Any other invalid-operation not mentioned above or zero divide 782 * (FP_X_INV, FP_X_DZ) 783 * 4 Denormal operand (FP_X_DNML) 784 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 785 * 6 Inexact result (FP_X_IMP) 786 */ 787static const uint8_t fpetable[128] = { 788 0, 789 FPE_FLTINV, /* 1 - INV */ 790 FPE_FLTUND, /* 2 - DNML */ 791 FPE_FLTINV, /* 3 - INV | DNML */ 792 FPE_FLTDIV, /* 4 - DZ */ 793 FPE_FLTINV, /* 5 - INV | DZ */ 794 FPE_FLTDIV, /* 6 - DNML | DZ */ 795 FPE_FLTINV, /* 7 - INV | DNML | DZ */ 796 FPE_FLTOVF, /* 8 - OFL */ 797 FPE_FLTINV, /* 9 - INV | OFL */ 798 FPE_FLTUND, /* A - DNML | OFL */ 799 FPE_FLTINV, /* B - INV | DNML | OFL */ 800 FPE_FLTDIV, /* C - DZ | OFL */ 801 FPE_FLTINV, /* D - INV | DZ | OFL */ 802 FPE_FLTDIV, /* E - DNML | DZ | OFL */ 803 FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ 804 FPE_FLTUND, /* 10 - UFL */ 805 FPE_FLTINV, /* 11 - INV | UFL */ 806 FPE_FLTUND, /* 12 - DNML | UFL */ 807 FPE_FLTINV, /* 13 - INV | DNML | UFL */ 808 FPE_FLTDIV, /* 14 - DZ | UFL */ 809 FPE_FLTINV, /* 15 - INV | DZ | UFL */ 810 FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ 811 FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ 812 FPE_FLTOVF, /* 18 - OFL | UFL */ 813 FPE_FLTINV, /* 19 - INV | OFL | UFL */ 814 FPE_FLTUND, /* 1A - DNML | OFL | UFL */ 815 FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ 816 FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ 817 FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ 818 FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ 819 FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ 820 FPE_FLTRES, /* 20 - IMP */ 821 FPE_FLTINV, /* 21 - INV | IMP */ 822 FPE_FLTUND, /* 22 - DNML | IMP */ 823 FPE_FLTINV, /* 23 - INV | DNML | IMP */ 824 FPE_FLTDIV, /* 24 - DZ | IMP */ 825 FPE_FLTINV, /* 25 - INV | DZ | IMP */ 826 FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ 827 FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ 828 FPE_FLTOVF, /* 28 - OFL | IMP */ 829 FPE_FLTINV, /* 29 - INV | OFL | IMP */ 830 FPE_FLTUND, /* 2A - DNML | OFL | IMP */ 831 FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ 832 FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ 833 FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ 834 FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ 835 FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ 836 FPE_FLTUND, /* 30 - UFL | IMP */ 837 FPE_FLTINV, /* 31 - INV | UFL | IMP */ 838 FPE_FLTUND, /* 32 - DNML | UFL | IMP */ 839 FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ 840 FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ 841 FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ 842 FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ 843 FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ 844 FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ 845 FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ 846 FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ 847 FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ 848 FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ 849 FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ 850 FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ 851 FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ 852 FPE_FLTSUB, /* 40 - STK */ 853 FPE_FLTSUB, /* 41 - INV | STK */ 854 FPE_FLTUND, /* 42 - DNML | STK */ 855 FPE_FLTSUB, /* 43 - INV | DNML | STK */ 856 FPE_FLTDIV, /* 44 - DZ | STK */ 857 FPE_FLTSUB, /* 45 - INV | DZ | STK */ 858 FPE_FLTDIV, /* 46 - DNML | DZ | STK */ 859 FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ 860 FPE_FLTOVF, /* 48 - OFL | STK */ 861 FPE_FLTSUB, /* 49 - INV | OFL | STK */ 862 FPE_FLTUND, /* 4A - DNML | OFL | STK */ 863 FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ 864 FPE_FLTDIV, /* 4C - DZ | OFL | STK */ 865 FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ 866 FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ 867 FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ 868 FPE_FLTUND, /* 50 - UFL | STK */ 869 FPE_FLTSUB, /* 51 - INV | UFL | STK */ 870 FPE_FLTUND, /* 52 - DNML | UFL | STK */ 871 FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ 872 FPE_FLTDIV, /* 54 - DZ | UFL | STK */ 873 FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ 874 FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ 875 FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ 876 FPE_FLTOVF, /* 58 - OFL | UFL | STK */ 877 FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ 878 FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ 879 FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ 880 FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ 881 FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ 882 FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ 883 FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ 884 FPE_FLTRES, /* 60 - IMP | STK */ 885 FPE_FLTSUB, /* 61 - INV | IMP | STK */ 886 FPE_FLTUND, /* 62 - DNML | IMP | STK */ 887 FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ 888 FPE_FLTDIV, /* 64 - DZ | IMP | STK */ 889 FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ 890 FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ 891 FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ 892 FPE_FLTOVF, /* 68 - OFL | IMP | STK */ 893 FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ 894 FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ 895 FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ 896 FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ 897 FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ 898 FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ 899 FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ 900 FPE_FLTUND, /* 70 - UFL | IMP | STK */ 901 FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ 902 FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ 903 FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ 904 FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ 905 FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ 906 FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ 907 FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ 908 FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ 909 FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ 910 FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ 911 FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ 912 FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ 913 FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ 914 FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ 915 FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ 916}; 917 918#define GET_FPU_CW(pcb) \ 919 (i386_use_fxsave ? \ 920 pcb->pcb_savefpu.sv_xmm.sv_env.en_cw : \ 921 pcb->pcb_savefpu.sv_87.sv_env.en_cw) 922#define GET_FPU_SW(pcb) \ 923 (i386_use_fxsave ? \ 924 pcb->pcb_savefpu.sv_xmm.sv_env.en_sw : \ 925 pcb->pcb_savefpu.sv_87.sv_env.en_sw) 926 927/* 928 * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. 929 * 930 * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now 931 * depend on longjmp() restoring a usable state. Restoring the state 932 * or examining it might fail if we didn't clear exceptions. 933 * 934 * The error code chosen will be one of the FPE_... macros. It will be 935 * sent as the second argument to old BSD-style signal handlers and as 936 * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers. 937 * 938 * XXX the FP state is not preserved across signal handlers. So signal 939 * handlers cannot afford to do FP unless they preserve the state or 940 * longjmp() out. Both preserving the state and longjmp()ing may be 941 * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable 942 * solution for signals other than SIGFPE. 943 */ 944int 945npxtrap(struct lwp *l) 946{ 947 u_short control, status; 948 struct cpu_info *ci = curcpu(); 949 struct lwp *fl = ci->ci_fpcurlwp; 950 951 if (!i386_fpu_present) { 952 printf("%s: fpcurthread = %p, curthread = %p, npx_type = %d\n", 953 __func__, fl, l, npx_type); 954 panic("npxtrap from nowhere"); 955 } 956 kpreempt_disable(); 957 958 /* 959 * Interrupt handling (for another interrupt) may have pushed the 960 * state to memory. Fetch the relevant parts of the state from 961 * wherever they are. 962 */ 963 if (fl != l) { 964 struct pcb *pcb = lwp_getpcb(l); 965 control = GET_FPU_CW(pcb); 966 status = GET_FPU_SW(pcb); 967 } else { 968 fnstcw(&control); 969 fnstsw(&status); 970 } 971 972 if (fl == l) 973 fnclex(); 974 kpreempt_enable(); 975 return fpetable[status & ((~control & 0x3f) | 0x40)]; 976} 977