npx.c revision 238311
155714Skris/*- 255714Skris * Copyright (c) 1990 William Jolitz. 355714Skris * Copyright (c) 1991 The Regents of the University of California. 455714Skris * All rights reserved. 555714Skris * 655714Skris * Redistribution and use in source and binary forms, with or without 755714Skris * modification, are permitted provided that the following conditions 855714Skris * are met: 955714Skris * 1. Redistributions of source code must retain the above copyright 1055714Skris * notice, this list of conditions and the following disclaimer. 1155714Skris * 2. Redistributions in binary form must reproduce the above copyright 1255714Skris * notice, this list of conditions and the following disclaimer in the 1355714Skris * documentation and/or other materials provided with the distribution. 1455714Skris * 4. Neither the name of the University nor the names of its contributors 1555714Skris * may be used to endorse or promote products derived from this software 1655714Skris * without specific prior written permission. 1755714Skris * 1855714Skris * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 1955714Skris * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2055714Skris * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2155714Skris * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2255714Skris * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2355714Skris * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2455714Skris * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2555714Skris * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2655714Skris * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2755714Skris * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2855714Skris * SUCH DAMAGE. 2955714Skris * 3055714Skris * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 3155714Skris */ 3255714Skris 3355714Skris#include <sys/cdefs.h> 3455714Skris__FBSDID("$FreeBSD: head/sys/i386/isa/npx.c 238311 2012-07-09 20:55:39Z jhb $"); 3555714Skris 3655714Skris#include "opt_cpu.h" 3755714Skris#include "opt_isa.h" 3855714Skris#include "opt_npx.h" 3955714Skris 4055714Skris#include <sys/param.h> 4155714Skris#include <sys/systm.h> 4255714Skris#include <sys/bus.h> 4355714Skris#include <sys/kernel.h> 4455714Skris#include <sys/lock.h> 4555714Skris#include <sys/malloc.h> 4655714Skris#include <sys/module.h> 4755714Skris#include <sys/mutex.h> 4855714Skris#include <sys/mutex.h> 4955714Skris#include <sys/proc.h> 5055714Skris#include <sys/smp.h> 5155714Skris#include <sys/sysctl.h> 5255714Skris#include <machine/bus.h> 5355714Skris#include <sys/rman.h> 5455714Skris#ifdef NPX_DEBUG 5555714Skris#include <sys/syslog.h> 5655714Skris#endif 5755714Skris#include <sys/signalvar.h> 5855714Skris 5955714Skris#include <machine/asmacros.h> 6055714Skris#include <machine/cputypes.h> 6155714Skris#include <machine/frame.h> 6255714Skris#include <machine/md_var.h> 6355714Skris#include <machine/pcb.h> 6455714Skris#include <machine/psl.h> 6555714Skris#include <machine/resource.h> 6655714Skris#include <machine/specialreg.h> 6755714Skris#include <machine/segments.h> 6855714Skris#include <machine/ucontext.h> 6955714Skris 7055714Skris#include <machine/intr_machdep.h> 71160814Ssimon#ifdef XEN 72160814Ssimon#include <machine/xen/xen-os.h> 73109998Smarkm#include <xen/hypervisor.h> 7455714Skris#endif 7555714Skris 76109998Smarkm#ifdef DEV_ISA 77109998Smarkm#include <isa/isavar.h> 78109998Smarkm#endif 7955714Skris 8055714Skris#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) 8155714Skris#define CPU_ENABLE_SSE 8255714Skris#endif 8355714Skris 8455714Skris/* 8555714Skris * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. 8655714Skris */ 8755714Skris 8855714Skris#if defined(__GNUCLIKE_ASM) && !defined(lint) 8955714Skris 9055714Skris#define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) 9155714Skris#define fnclex() __asm __volatile("fnclex") 9255714Skris#define fninit() __asm __volatile("fninit") 9355714Skris#define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) 9455714Skris#define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 9555714Skris#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) 9655714Skris#define fp_divide_by_0() __asm __volatile( \ 9755714Skris "fldz; fld1; fdiv %st,%st(1); fnop") 9855714Skris#define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr))) 9955714Skris#ifdef CPU_ENABLE_SSE 100109998Smarkm#define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) 10155714Skris#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) 10255714Skris#endif 10355714Skris#else /* !(__GNUCLIKE_ASM && !lint) */ 10455714Skris 10555714Skrisvoid fldcw(u_short cw); 10655714Skrisvoid fnclex(void); 10755714Skrisvoid fninit(void); 10855714Skrisvoid fnsave(caddr_t addr); 10955714Skrisvoid fnstcw(caddr_t addr); 11055714Skrisvoid fnstsw(caddr_t addr); 11155714Skrisvoid fp_divide_by_0(void); 11255714Skrisvoid frstor(caddr_t addr); 11355714Skris#ifdef CPU_ENABLE_SSE 11455714Skrisvoid fxsave(caddr_t addr); 11555714Skrisvoid fxrstor(caddr_t addr); 11655714Skris#endif 11755714Skris 118109998Smarkm#endif /* __GNUCLIKE_ASM && !lint */ 11955714Skris 12055714Skris#ifdef XEN 121109998Smarkm#define start_emulating() (HYPERVISOR_fpu_taskswitch(1)) 12255714Skris#define stop_emulating() (HYPERVISOR_fpu_taskswitch(0)) 12355714Skris#else 124109998Smarkm#define start_emulating() load_cr0(rcr0() | CR0_TS) 12555714Skris#define stop_emulating() clts() 12655714Skris#endif 12755714Skris 12855714Skris#ifdef CPU_ENABLE_SSE 12955714Skris#define GET_FPU_CW(thread) \ 13055714Skris (cpu_fxsr ? \ 13155714Skris (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \ 13255714Skris (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw) 13355714Skris#define GET_FPU_SW(thread) \ 13455714Skris (cpu_fxsr ? \ 13555714Skris (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \ 13655714Skris (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw) 13755714Skris#define SET_FPU_CW(savefpu, value) do { \ 13855714Skris if (cpu_fxsr) \ 13955714Skris (savefpu)->sv_xmm.sv_env.en_cw = (value); \ 14055714Skris else \ 14155714Skris (savefpu)->sv_87.sv_env.en_cw = (value); \ 14255714Skris} while (0) 14355714Skris#else /* CPU_ENABLE_SSE */ 14455714Skris#define GET_FPU_CW(thread) \ 14555714Skris (thread->td_pcb->pcb_save->sv_87.sv_env.en_cw) 14655714Skris#define GET_FPU_SW(thread) \ 14755714Skris (thread->td_pcb->pcb_save->sv_87.sv_env.en_sw) 14855714Skris#define SET_FPU_CW(savefpu, value) \ 14955714Skris (savefpu)->sv_87.sv_env.en_cw = (value) 15055714Skris#endif /* CPU_ENABLE_SSE */ 151109998Smarkm 15255714Skristypedef u_char bool_t; 15355714Skris 15455714Skris#ifdef CPU_ENABLE_SSE 15555714Skrisstatic void fpu_clean_state(void); 15655714Skris#endif 15755714Skris 15855714Skrisstatic void fpusave(union savefpu *); 15955714Skrisstatic void fpurstor(union savefpu *); 16055714Skrisstatic int npx_attach(device_t dev); 16155714Skrisstatic void npx_identify(driver_t *driver, device_t parent); 16255714Skrisstatic int npx_probe(device_t dev); 163109998Smarkm 16455714Skrisint hw_float; 16555714Skris 166109998SmarkmSYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, 16755714Skris &hw_float, 0, "Floating point instructions executed in hardware"); 16855714Skris 169109998Smarkmstatic volatile u_int npx_traps_while_probing; 17055714Skrisstatic union savefpu npx_initialstate; 17155714Skris 17255714Skrisalias_for_inthand_t probetrap; 17355714Skris__asm(" \n\ 17455714Skris .text \n\ 17555714Skris .p2align 2,0x90 \n\ 17655714Skris .type " __XSTRING(CNAME(probetrap)) ",@function \n\ 17755714Skris" __XSTRING(CNAME(probetrap)) ": \n\ 17855714Skris ss \n\ 17955714Skris incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ 18055714Skris fnclex \n\ 18155714Skris iret \n\ 18255714Skris"); 18355714Skris 18455714Skris/* 18555714Skris * Identify routine. Create a connection point on our parent for probing. 18655714Skris */ 18755714Skrisstatic void 18855714Skrisnpx_identify(driver, parent) 18955714Skris driver_t *driver; 19055714Skris device_t parent; 19155714Skris{ 19255714Skris device_t child; 19355714Skris 19455714Skris child = BUS_ADD_CHILD(parent, 0, "npx", 0); 19555714Skris if (child == NULL) 19655714Skris panic("npx_identify"); 19755714Skris} 19855714Skris 19955714Skris/* 200 * Probe routine. Set flags to tell npxattach() what to do. Set up an 201 * interrupt handler if npx needs to use interrupts. 202 */ 203static int 204npx_probe(device_t dev) 205{ 206 struct gate_descriptor save_idt_npxtrap; 207 u_short control, status; 208 209 device_set_desc(dev, "math processor"); 210 211 /* 212 * Modern CPUs all have an FPU that uses the INT16 interface 213 * and provide a simple way to verify that, so handle the 214 * common case right away. 215 */ 216 if (cpu_feature & CPUID_FPU) { 217 hw_float = 1; 218 device_quiet(dev); 219 return (0); 220 } 221 222 save_idt_npxtrap = idt[IDT_MF]; 223 setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL, 224 GSEL(GCODE_SEL, SEL_KPL)); 225 226 /* 227 * Don't trap while we're probing. 228 */ 229 stop_emulating(); 230 231 /* 232 * Finish resetting the coprocessor, if any. If there is an error 233 * pending, then we may get a bogus IRQ13, but npx_intr() will handle 234 * it OK. Bogus halts have never been observed, but we enabled 235 * IRQ13 and cleared the BUSY# latch early to handle them anyway. 236 */ 237 fninit(); 238 239 /* 240 * Don't use fwait here because it might hang. 241 * Don't use fnop here because it usually hangs if there is no FPU. 242 */ 243 DELAY(1000); /* wait for any IRQ13 */ 244#ifdef DIAGNOSTIC 245 if (npx_traps_while_probing != 0) 246 printf("fninit caused %u bogus npx trap(s)\n", 247 npx_traps_while_probing); 248#endif 249 /* 250 * Check for a status of mostly zero. 251 */ 252 status = 0x5a5a; 253 fnstsw(&status); 254 if ((status & 0xb8ff) == 0) { 255 /* 256 * Good, now check for a proper control word. 257 */ 258 control = 0x5a5a; 259 fnstcw(&control); 260 if ((control & 0x1f3f) == 0x033f) { 261 /* 262 * We have an npx, now divide by 0 to see if exception 263 * 16 works. 264 */ 265 control &= ~(1 << 2); /* enable divide by 0 trap */ 266 fldcw(control); 267#ifdef FPU_ERROR_BROKEN 268 /* 269 * FPU error signal doesn't work on some CPU 270 * accelerator board. 271 */ 272 hw_float = 1; 273 return (0); 274#endif 275 npx_traps_while_probing = 0; 276 fp_divide_by_0(); 277 if (npx_traps_while_probing != 0) { 278 /* 279 * Good, exception 16 works. 280 */ 281 hw_float = 1; 282 goto cleanup; 283 } 284 device_printf(dev, 285 "FPU does not use exception 16 for error reporting\n"); 286 goto cleanup; 287 } 288 } 289 290 /* 291 * Probe failed. Floating point simply won't work. 292 * Notify user and disable FPU/MMX/SSE instruction execution. 293 */ 294 device_printf(dev, "WARNING: no FPU!\n"); 295 __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : : 296 "n" (CR0_EM | CR0_MP) : "ax"); 297 298cleanup: 299 idt[IDT_MF] = save_idt_npxtrap; 300 return (hw_float ? 0 : ENXIO); 301} 302 303/* 304 * Attach routine - announce which it is, and wire into system 305 */ 306static int 307npx_attach(device_t dev) 308{ 309 310 npxinit(); 311 critical_enter(); 312 stop_emulating(); 313 fpusave(&npx_initialstate); 314 start_emulating(); 315#ifdef CPU_ENABLE_SSE 316 if (cpu_fxsr) { 317 if (npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask) 318 cpu_mxcsr_mask = 319 npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask; 320 else 321 cpu_mxcsr_mask = 0xFFBF; 322 bzero(npx_initialstate.sv_xmm.sv_fp, 323 sizeof(npx_initialstate.sv_xmm.sv_fp)); 324 bzero(npx_initialstate.sv_xmm.sv_xmm, 325 sizeof(npx_initialstate.sv_xmm.sv_xmm)); 326 /* XXX might need even more zeroing. */ 327 } else 328#endif 329 bzero(npx_initialstate.sv_87.sv_ac, 330 sizeof(npx_initialstate.sv_87.sv_ac)); 331 critical_exit(); 332 333 return (0); 334} 335 336/* 337 * Initialize floating point unit. 338 */ 339void 340npxinit(void) 341{ 342 static union savefpu dummy; 343 register_t saveintr; 344 u_short control; 345 346 if (!hw_float) 347 return; 348 /* 349 * fninit has the same h/w bugs as fnsave. Use the detoxified 350 * fnsave to throw away any junk in the fpu. npxsave() initializes 351 * the fpu and sets fpcurthread = NULL as important side effects. 352 * 353 * It is too early for critical_enter() to work on AP. 354 */ 355 saveintr = intr_disable(); 356 npxsave(&dummy); 357 stop_emulating(); 358#ifdef CPU_ENABLE_SSE 359 /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */ 360 if (cpu_fxsr) 361 fninit(); 362#endif 363 control = __INITIAL_NPXCW__; 364 fldcw(control); 365 start_emulating(); 366 intr_restore(saveintr); 367} 368 369/* 370 * Free coprocessor (if we have it). 371 */ 372void 373npxexit(td) 374 struct thread *td; 375{ 376 377 critical_enter(); 378 if (curthread == PCPU_GET(fpcurthread)) 379 npxsave(PCPU_GET(curpcb)->pcb_save); 380 critical_exit(); 381#ifdef NPX_DEBUG 382 if (hw_float) { 383 u_int masked_exceptions; 384 385 masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; 386 /* 387 * Log exceptions that would have trapped with the old 388 * control word (overflow, divide by 0, and invalid operand). 389 */ 390 if (masked_exceptions & 0x0d) 391 log(LOG_ERR, 392 "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", 393 td->td_proc->p_pid, td->td_proc->p_comm, 394 masked_exceptions); 395 } 396#endif 397} 398 399int 400npxformat() 401{ 402 403 if (!hw_float) 404 return (_MC_FPFMT_NODEV); 405#ifdef CPU_ENABLE_SSE 406 if (cpu_fxsr) 407 return (_MC_FPFMT_XMM); 408#endif 409 return (_MC_FPFMT_387); 410} 411 412/* 413 * The following mechanism is used to ensure that the FPE_... value 414 * that is passed as a trapcode to the signal handler of the user 415 * process does not have more than one bit set. 416 * 417 * Multiple bits may be set if the user process modifies the control 418 * word while a status word bit is already set. While this is a sign 419 * of bad coding, we have no choise than to narrow them down to one 420 * bit, since we must not send a trapcode that is not exactly one of 421 * the FPE_ macros. 422 * 423 * The mechanism has a static table with 127 entries. Each combination 424 * of the 7 FPU status word exception bits directly translates to a 425 * position in this table, where a single FPE_... value is stored. 426 * This FPE_... value stored there is considered the "most important" 427 * of the exception bits and will be sent as the signal code. The 428 * precedence of the bits is based upon Intel Document "Numerical 429 * Applications", Chapter "Special Computational Situations". 430 * 431 * The macro to choose one of these values does these steps: 1) Throw 432 * away status word bits that cannot be masked. 2) Throw away the bits 433 * currently masked in the control word, assuming the user isn't 434 * interested in them anymore. 3) Reinsert status word bit 7 (stack 435 * fault) if it is set, which cannot be masked but must be presered. 436 * 4) Use the remaining bits to point into the trapcode table. 437 * 438 * The 6 maskable bits in order of their preference, as stated in the 439 * above referenced Intel manual: 440 * 1 Invalid operation (FP_X_INV) 441 * 1a Stack underflow 442 * 1b Stack overflow 443 * 1c Operand of unsupported format 444 * 1d SNaN operand. 445 * 2 QNaN operand (not an exception, irrelavant here) 446 * 3 Any other invalid-operation not mentioned above or zero divide 447 * (FP_X_INV, FP_X_DZ) 448 * 4 Denormal operand (FP_X_DNML) 449 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 450 * 6 Inexact result (FP_X_IMP) 451 */ 452static char fpetable[128] = { 453 0, 454 FPE_FLTINV, /* 1 - INV */ 455 FPE_FLTUND, /* 2 - DNML */ 456 FPE_FLTINV, /* 3 - INV | DNML */ 457 FPE_FLTDIV, /* 4 - DZ */ 458 FPE_FLTINV, /* 5 - INV | DZ */ 459 FPE_FLTDIV, /* 6 - DNML | DZ */ 460 FPE_FLTINV, /* 7 - INV | DNML | DZ */ 461 FPE_FLTOVF, /* 8 - OFL */ 462 FPE_FLTINV, /* 9 - INV | OFL */ 463 FPE_FLTUND, /* A - DNML | OFL */ 464 FPE_FLTINV, /* B - INV | DNML | OFL */ 465 FPE_FLTDIV, /* C - DZ | OFL */ 466 FPE_FLTINV, /* D - INV | DZ | OFL */ 467 FPE_FLTDIV, /* E - DNML | DZ | OFL */ 468 FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ 469 FPE_FLTUND, /* 10 - UFL */ 470 FPE_FLTINV, /* 11 - INV | UFL */ 471 FPE_FLTUND, /* 12 - DNML | UFL */ 472 FPE_FLTINV, /* 13 - INV | DNML | UFL */ 473 FPE_FLTDIV, /* 14 - DZ | UFL */ 474 FPE_FLTINV, /* 15 - INV | DZ | UFL */ 475 FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ 476 FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ 477 FPE_FLTOVF, /* 18 - OFL | UFL */ 478 FPE_FLTINV, /* 19 - INV | OFL | UFL */ 479 FPE_FLTUND, /* 1A - DNML | OFL | UFL */ 480 FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ 481 FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ 482 FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ 483 FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ 484 FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ 485 FPE_FLTRES, /* 20 - IMP */ 486 FPE_FLTINV, /* 21 - INV | IMP */ 487 FPE_FLTUND, /* 22 - DNML | IMP */ 488 FPE_FLTINV, /* 23 - INV | DNML | IMP */ 489 FPE_FLTDIV, /* 24 - DZ | IMP */ 490 FPE_FLTINV, /* 25 - INV | DZ | IMP */ 491 FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ 492 FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ 493 FPE_FLTOVF, /* 28 - OFL | IMP */ 494 FPE_FLTINV, /* 29 - INV | OFL | IMP */ 495 FPE_FLTUND, /* 2A - DNML | OFL | IMP */ 496 FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ 497 FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ 498 FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ 499 FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ 500 FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ 501 FPE_FLTUND, /* 30 - UFL | IMP */ 502 FPE_FLTINV, /* 31 - INV | UFL | IMP */ 503 FPE_FLTUND, /* 32 - DNML | UFL | IMP */ 504 FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ 505 FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ 506 FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ 507 FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ 508 FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ 509 FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ 510 FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ 511 FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ 512 FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ 513 FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ 514 FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ 515 FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ 516 FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ 517 FPE_FLTSUB, /* 40 - STK */ 518 FPE_FLTSUB, /* 41 - INV | STK */ 519 FPE_FLTUND, /* 42 - DNML | STK */ 520 FPE_FLTSUB, /* 43 - INV | DNML | STK */ 521 FPE_FLTDIV, /* 44 - DZ | STK */ 522 FPE_FLTSUB, /* 45 - INV | DZ | STK */ 523 FPE_FLTDIV, /* 46 - DNML | DZ | STK */ 524 FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ 525 FPE_FLTOVF, /* 48 - OFL | STK */ 526 FPE_FLTSUB, /* 49 - INV | OFL | STK */ 527 FPE_FLTUND, /* 4A - DNML | OFL | STK */ 528 FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ 529 FPE_FLTDIV, /* 4C - DZ | OFL | STK */ 530 FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ 531 FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ 532 FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ 533 FPE_FLTUND, /* 50 - UFL | STK */ 534 FPE_FLTSUB, /* 51 - INV | UFL | STK */ 535 FPE_FLTUND, /* 52 - DNML | UFL | STK */ 536 FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ 537 FPE_FLTDIV, /* 54 - DZ | UFL | STK */ 538 FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ 539 FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ 540 FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ 541 FPE_FLTOVF, /* 58 - OFL | UFL | STK */ 542 FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ 543 FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ 544 FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ 545 FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ 546 FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ 547 FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ 548 FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ 549 FPE_FLTRES, /* 60 - IMP | STK */ 550 FPE_FLTSUB, /* 61 - INV | IMP | STK */ 551 FPE_FLTUND, /* 62 - DNML | IMP | STK */ 552 FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ 553 FPE_FLTDIV, /* 64 - DZ | IMP | STK */ 554 FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ 555 FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ 556 FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ 557 FPE_FLTOVF, /* 68 - OFL | IMP | STK */ 558 FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ 559 FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ 560 FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ 561 FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ 562 FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ 563 FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ 564 FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ 565 FPE_FLTUND, /* 70 - UFL | IMP | STK */ 566 FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ 567 FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ 568 FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ 569 FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ 570 FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ 571 FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ 572 FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ 573 FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ 574 FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ 575 FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ 576 FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ 577 FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ 578 FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ 579 FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ 580 FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ 581}; 582 583/* 584 * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. 585 * 586 * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now 587 * depend on longjmp() restoring a usable state. Restoring the state 588 * or examining it might fail if we didn't clear exceptions. 589 * 590 * The error code chosen will be one of the FPE_... macros. It will be 591 * sent as the second argument to old BSD-style signal handlers and as 592 * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers. 593 * 594 * XXX the FP state is not preserved across signal handlers. So signal 595 * handlers cannot afford to do FP unless they preserve the state or 596 * longjmp() out. Both preserving the state and longjmp()ing may be 597 * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable 598 * solution for signals other than SIGFPE. 599 */ 600int 601npxtrap() 602{ 603 u_short control, status; 604 605 if (!hw_float) { 606 printf("npxtrap: fpcurthread = %p, curthread = %p, hw_float = %d\n", 607 PCPU_GET(fpcurthread), curthread, hw_float); 608 panic("npxtrap from nowhere"); 609 } 610 critical_enter(); 611 612 /* 613 * Interrupt handling (for another interrupt) may have pushed the 614 * state to memory. Fetch the relevant parts of the state from 615 * wherever they are. 616 */ 617 if (PCPU_GET(fpcurthread) != curthread) { 618 control = GET_FPU_CW(curthread); 619 status = GET_FPU_SW(curthread); 620 } else { 621 fnstcw(&control); 622 fnstsw(&status); 623 } 624 625 if (PCPU_GET(fpcurthread) == curthread) 626 fnclex(); 627 critical_exit(); 628 return (fpetable[status & ((~control & 0x3f) | 0x40)]); 629} 630 631/* 632 * Implement device not available (DNA) exception 633 * 634 * It would be better to switch FP context here (if curthread != fpcurthread) 635 * and not necessarily for every context switch, but it is too hard to 636 * access foreign pcb's. 637 */ 638 639static int err_count = 0; 640 641int 642npxdna(void) 643{ 644 struct pcb *pcb; 645 646 if (!hw_float) 647 return (0); 648 critical_enter(); 649 if (PCPU_GET(fpcurthread) == curthread) { 650 printf("npxdna: fpcurthread == curthread %d times\n", 651 ++err_count); 652 stop_emulating(); 653 critical_exit(); 654 return (1); 655 } 656 if (PCPU_GET(fpcurthread) != NULL) { 657 printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", 658 PCPU_GET(fpcurthread), 659 PCPU_GET(fpcurthread)->td_proc->p_pid, 660 curthread, curthread->td_proc->p_pid); 661 panic("npxdna"); 662 } 663 stop_emulating(); 664 /* 665 * Record new context early in case frstor causes an IRQ13. 666 */ 667 PCPU_SET(fpcurthread, curthread); 668 pcb = PCPU_GET(curpcb); 669 670#ifdef CPU_ENABLE_SSE 671 if (cpu_fxsr) 672 fpu_clean_state(); 673#endif 674 675 if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { 676 /* 677 * This is the first time this thread has used the FPU or 678 * the PCB doesn't contain a clean FPU state. Explicitly 679 * load an initial state. 680 */ 681 fpurstor(&npx_initialstate); 682 if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__) 683 fldcw(pcb->pcb_initial_npxcw); 684 pcb->pcb_flags |= PCB_NPXINITDONE; 685 if (PCB_USER_FPU(pcb)) 686 pcb->pcb_flags |= PCB_NPXUSERINITDONE; 687 } else { 688 /* 689 * The following fpurstor() may cause an IRQ13 when the 690 * state being restored has a pending error. The error will 691 * appear to have been triggered by the current (npx) user 692 * instruction even when that instruction is a no-wait 693 * instruction that should not trigger an error (e.g., 694 * fnclex). On at least one 486 system all of the no-wait 695 * instructions are broken the same as frstor, so our 696 * treatment does not amplify the breakage. On at least 697 * one 386/Cyrix 387 system, fnclex works correctly while 698 * frstor and fnsave are broken, so our treatment breaks 699 * fnclex if it is the first FPU instruction after a context 700 * switch. 701 */ 702 fpurstor(pcb->pcb_save); 703 } 704 critical_exit(); 705 706 return (1); 707} 708 709/* 710 * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx 711 * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by 712 * no-wait npx instructions. See the Intel application note AP-578 for 713 * details. This doesn't cause any additional complications here. IRQ13's 714 * are inherently asynchronous unless the CPU is frozen to deliver them -- 715 * one that started in userland may be delivered many instructions later, 716 * after the process has entered the kernel. It may even be delivered after 717 * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in 718 * the same way as a very-late-arriving non-spurious IRQ13 from user mode: 719 * it is normally ignored at first because we set fpcurthread to NULL; it is 720 * normally retriggered in npxdna() after return to user mode. 721 * 722 * npxsave() must be called with interrupts disabled, so that it clears 723 * fpcurthread atomically with saving the state. We require callers to do the 724 * disabling, since most callers need to disable interrupts anyway to call 725 * npxsave() atomically with checking fpcurthread. 726 * 727 * A previous version of npxsave() went to great lengths to excecute fnsave 728 * with interrupts enabled in case executing it froze the CPU. This case 729 * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply 730 * spurious freezes. 731 */ 732void 733npxsave(addr) 734 union savefpu *addr; 735{ 736 737 stop_emulating(); 738 fpusave(addr); 739 740 start_emulating(); 741 PCPU_SET(fpcurthread, NULL); 742} 743 744void 745npxdrop() 746{ 747 struct thread *td; 748 749 /* 750 * Discard pending exceptions in the !cpu_fxsr case so that unmasked 751 * ones don't cause a panic on the next frstor. 752 */ 753#ifdef CPU_ENABLE_SSE 754 if (!cpu_fxsr) 755#endif 756 fnclex(); 757 758 td = PCPU_GET(fpcurthread); 759 KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); 760 CRITICAL_ASSERT(td); 761 PCPU_SET(fpcurthread, NULL); 762 td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; 763 start_emulating(); 764} 765 766/* 767 * Get the user state of the FPU into pcb->pcb_user_save without 768 * dropping ownership (if possible). It returns the FPU ownership 769 * status. 770 */ 771int 772npxgetregs(struct thread *td) 773{ 774 struct pcb *pcb; 775 776 if (!hw_float) 777 return (_MC_FPOWNED_NONE); 778 779 pcb = td->td_pcb; 780 if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { 781 bcopy(&npx_initialstate, &pcb->pcb_user_save, 782 sizeof(npx_initialstate)); 783 SET_FPU_CW(&pcb->pcb_user_save, pcb->pcb_initial_npxcw); 784 npxuserinited(td); 785 return (_MC_FPOWNED_PCB); 786 } 787 critical_enter(); 788 if (td == PCPU_GET(fpcurthread)) { 789 fpusave(&pcb->pcb_user_save); 790#ifdef CPU_ENABLE_SSE 791 if (!cpu_fxsr) 792#endif 793 /* 794 * fnsave initializes the FPU and destroys whatever 795 * context it contains. Make sure the FPU owner 796 * starts with a clean state next time. 797 */ 798 npxdrop(); 799 critical_exit(); 800 return (_MC_FPOWNED_FPU); 801 } else { 802 critical_exit(); 803 return (_MC_FPOWNED_PCB); 804 } 805} 806 807void 808npxuserinited(struct thread *td) 809{ 810 struct pcb *pcb; 811 812 pcb = td->td_pcb; 813 if (PCB_USER_FPU(pcb)) 814 pcb->pcb_flags |= PCB_NPXINITDONE; 815 pcb->pcb_flags |= PCB_NPXUSERINITDONE; 816} 817 818 819void 820npxsetregs(struct thread *td, union savefpu *addr) 821{ 822 struct pcb *pcb; 823 824 if (!hw_float) 825 return; 826 827 pcb = td->td_pcb; 828 critical_enter(); 829 if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { 830#ifdef CPU_ENABLE_SSE 831 if (!cpu_fxsr) 832#endif 833 fnclex(); /* As in npxdrop(). */ 834 if (((uintptr_t)addr & 0xf) != 0) { 835 bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); 836 fpurstor(&pcb->pcb_user_save); 837 } else 838 fpurstor(addr); 839 critical_exit(); 840 pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; 841 } else { 842 critical_exit(); 843 bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); 844 npxuserinited(td); 845 } 846} 847 848static void 849fpusave(addr) 850 union savefpu *addr; 851{ 852 853#ifdef CPU_ENABLE_SSE 854 if (cpu_fxsr) 855 fxsave(addr); 856 else 857#endif 858 fnsave(addr); 859} 860 861#ifdef CPU_ENABLE_SSE 862/* 863 * On AuthenticAMD processors, the fxrstor instruction does not restore 864 * the x87's stored last instruction pointer, last data pointer, and last 865 * opcode values, except in the rare case in which the exception summary 866 * (ES) bit in the x87 status word is set to 1. 867 * 868 * In order to avoid leaking this information across processes, we clean 869 * these values by performing a dummy load before executing fxrstor(). 870 */ 871static void 872fpu_clean_state(void) 873{ 874 static float dummy_variable = 0.0; 875 u_short status; 876 877 /* 878 * Clear the ES bit in the x87 status word if it is currently 879 * set, in order to avoid causing a fault in the upcoming load. 880 */ 881 fnstsw(&status); 882 if (status & 0x80) 883 fnclex(); 884 885 /* 886 * Load the dummy variable into the x87 stack. This mangles 887 * the x87 stack, but we don't care since we're about to call 888 * fxrstor() anyway. 889 */ 890 __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); 891} 892#endif /* CPU_ENABLE_SSE */ 893 894static void 895fpurstor(addr) 896 union savefpu *addr; 897{ 898 899#ifdef CPU_ENABLE_SSE 900 if (cpu_fxsr) 901 fxrstor(addr); 902 else 903#endif 904 frstor(addr); 905} 906 907static device_method_t npx_methods[] = { 908 /* Device interface */ 909 DEVMETHOD(device_identify, npx_identify), 910 DEVMETHOD(device_probe, npx_probe), 911 DEVMETHOD(device_attach, npx_attach), 912 DEVMETHOD(device_detach, bus_generic_detach), 913 DEVMETHOD(device_shutdown, bus_generic_shutdown), 914 DEVMETHOD(device_suspend, bus_generic_suspend), 915 DEVMETHOD(device_resume, bus_generic_resume), 916 917 { 0, 0 } 918}; 919 920static driver_t npx_driver = { 921 "npx", 922 npx_methods, 923 1, /* no softc */ 924}; 925 926static devclass_t npx_devclass; 927 928/* 929 * We prefer to attach to the root nexus so that the usual case (exception 16) 930 * doesn't describe the processor as being `on isa'. 931 */ 932DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0); 933 934#ifdef DEV_ISA 935/* 936 * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. 937 */ 938static struct isa_pnp_id npxisa_ids[] = { 939 { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ 940 { 0 } 941}; 942 943static int 944npxisa_probe(device_t dev) 945{ 946 int result; 947 if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) { 948 device_quiet(dev); 949 } 950 return(result); 951} 952 953static int 954npxisa_attach(device_t dev) 955{ 956 return (0); 957} 958 959static device_method_t npxisa_methods[] = { 960 /* Device interface */ 961 DEVMETHOD(device_probe, npxisa_probe), 962 DEVMETHOD(device_attach, npxisa_attach), 963 DEVMETHOD(device_detach, bus_generic_detach), 964 DEVMETHOD(device_shutdown, bus_generic_shutdown), 965 DEVMETHOD(device_suspend, bus_generic_suspend), 966 DEVMETHOD(device_resume, bus_generic_resume), 967 968 { 0, 0 } 969}; 970 971static driver_t npxisa_driver = { 972 "npxisa", 973 npxisa_methods, 974 1, /* no softc */ 975}; 976 977static devclass_t npxisa_devclass; 978 979DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); 980#ifndef PC98 981DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); 982#endif 983#endif /* DEV_ISA */ 984 985static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", 986 "Kernel contexts for FPU state"); 987 988#define XSAVE_AREA_ALIGN 64 989 990#define FPU_KERN_CTX_NPXINITDONE 0x01 991 992struct fpu_kern_ctx { 993 union savefpu *prev; 994 uint32_t flags; 995 char hwstate1[]; 996}; 997 998struct fpu_kern_ctx * 999fpu_kern_alloc_ctx(u_int flags) 1000{ 1001 struct fpu_kern_ctx *res; 1002 size_t sz; 1003 1004 sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + 1005 sizeof(union savefpu); 1006 res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? 1007 M_NOWAIT : M_WAITOK) | M_ZERO); 1008 return (res); 1009} 1010 1011void 1012fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) 1013{ 1014 1015 /* XXXKIB clear the memory ? */ 1016 free(ctx, M_FPUKERN_CTX); 1017} 1018 1019static union savefpu * 1020fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) 1021{ 1022 vm_offset_t p; 1023 1024 p = (vm_offset_t)&ctx->hwstate1; 1025 p = roundup2(p, XSAVE_AREA_ALIGN); 1026 return ((union savefpu *)p); 1027} 1028 1029int 1030fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) 1031{ 1032 struct pcb *pcb; 1033 1034 pcb = td->td_pcb; 1035 KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save, 1036 ("mangled pcb_save")); 1037 ctx->flags = 0; 1038 if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0) 1039 ctx->flags |= FPU_KERN_CTX_NPXINITDONE; 1040 npxexit(td); 1041 ctx->prev = pcb->pcb_save; 1042 pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); 1043 pcb->pcb_flags |= PCB_KERNNPX; 1044 pcb->pcb_flags &= ~PCB_NPXINITDONE; 1045 return (0); 1046} 1047 1048int 1049fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) 1050{ 1051 struct pcb *pcb; 1052 1053 pcb = td->td_pcb; 1054 critical_enter(); 1055 if (curthread == PCPU_GET(fpcurthread)) 1056 npxdrop(); 1057 critical_exit(); 1058 pcb->pcb_save = ctx->prev; 1059 if (pcb->pcb_save == &pcb->pcb_user_save) { 1060 if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0) 1061 pcb->pcb_flags |= PCB_NPXINITDONE; 1062 else 1063 pcb->pcb_flags &= ~PCB_NPXINITDONE; 1064 pcb->pcb_flags &= ~PCB_KERNNPX; 1065 } else { 1066 if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0) 1067 pcb->pcb_flags |= PCB_NPXINITDONE; 1068 else 1069 pcb->pcb_flags &= ~PCB_NPXINITDONE; 1070 KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); 1071 } 1072 return (0); 1073} 1074 1075int 1076fpu_kern_thread(u_int flags) 1077{ 1078 struct pcb *pcb; 1079 1080 pcb = PCPU_GET(curpcb); 1081 KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, 1082 ("Only kthread may use fpu_kern_thread")); 1083 KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save")); 1084 KASSERT(PCB_USER_FPU(pcb), ("recursive call")); 1085 1086 pcb->pcb_flags |= PCB_KERNNPX; 1087 return (0); 1088} 1089 1090int 1091is_fpu_kern_thread(u_int flags) 1092{ 1093 1094 if ((curthread->td_pflags & TDP_KTHREAD) == 0) 1095 return (0); 1096 return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNNPX) != 0); 1097} 1098