npx.c revision 215823
1220422Sgabor/*- 2210389Sgabor * Copyright (c) 1990 William Jolitz. 3210389Sgabor * Copyright (c) 1991 The Regents of the University of California. 4210389Sgabor * All rights reserved. 5210389Sgabor * 6211496Sdes * Redistribution and use in source and binary forms, with or without 7210389Sgabor * modification, are permitted provided that the following conditions 8210389Sgabor * are met: 9210389Sgabor * 1. Redistributions of source code must retain the above copyright 10210389Sgabor * notice, this list of conditions and the following disclaimer. 11210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright 12210389Sgabor * notice, this list of conditions and the following disclaimer in the 13210389Sgabor * documentation and/or other materials provided with the distribution. 14210389Sgabor * 4. Neither the name of the University nor the names of its contributors 15210389Sgabor * may be used to endorse or promote products derived from this software 16210389Sgabor * without specific prior written permission. 17210389Sgabor * 18210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21210389Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28210389Sgabor * SUCH DAMAGE. 29210389Sgabor * 30210389Sgabor * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 31210389Sgabor */ 32210389Sgabor 33210389Sgabor#include <sys/cdefs.h> 34210389Sgabor__FBSDID("$FreeBSD: head/sys/i386/isa/npx.c 215823 2010-11-25 18:14:18Z dim $"); 35210389Sgabor 36210389Sgabor#include "opt_cpu.h" 37210389Sgabor#include "opt_isa.h" 38210389Sgabor#include "opt_npx.h" 39226261Sgabor 40226261Sgabor#include <sys/param.h> 41210389Sgabor#include <sys/systm.h> 42210389Sgabor#include <sys/bus.h> 43210389Sgabor#include <sys/kernel.h> 44210389Sgabor#include <sys/lock.h> 45210389Sgabor#include <sys/malloc.h> 46210389Sgabor#include <sys/module.h> 47210389Sgabor#include <sys/mutex.h> 48210389Sgabor#include <sys/mutex.h> 49210389Sgabor#include <sys/proc.h> 50210389Sgabor#include <sys/smp.h> 51210389Sgabor#include <sys/sysctl.h> 52210389Sgabor#include <machine/bus.h> 53210389Sgabor#include <sys/rman.h> 54210389Sgabor#ifdef NPX_DEBUG 55210389Sgabor#include <sys/syslog.h> 56210389Sgabor#endif 57210389Sgabor#include <sys/signalvar.h> 58210389Sgabor 59210389Sgabor#include <machine/asmacros.h> 60210389Sgabor#include <machine/cputypes.h> 61210389Sgabor#include <machine/frame.h> 62210389Sgabor#include <machine/md_var.h> 63226261Sgabor#include <machine/pcb.h> 64226261Sgabor#include <machine/psl.h> 65226261Sgabor#include <machine/resource.h> 66226261Sgabor#include <machine/specialreg.h> 67226261Sgabor#include <machine/segments.h> 68210389Sgabor#include <machine/ucontext.h> 69210461Sgabor 70210389Sgabor#include <machine/intr_machdep.h> 71210389Sgabor#ifdef XEN 72210389Sgabor#include <machine/xen/xen-os.h> 73210461Sgabor#include <xen/hypervisor.h> 74210389Sgabor#endif 75210389Sgabor 76210461Sgabor#ifdef DEV_ISA 77210389Sgabor#include <isa/isavar.h> 78210389Sgabor#endif 79210389Sgabor 80210389Sgabor#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) 81210389Sgabor#define CPU_ENABLE_SSE 82210389Sgabor#endif 83210389Sgabor 84210389Sgabor/* 85210389Sgabor * 387 and 287 Numeric Coprocessor Extension (NPX) Driver. 86211463Sgabor */ 87210389Sgabor 88210389Sgabor#if defined(__GNUCLIKE_ASM) && !defined(lint) 89210389Sgabor 90210389Sgabor#define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) 91210389Sgabor#define fnclex() __asm __volatile("fnclex") 92210389Sgabor#define fninit() __asm __volatile("fninit") 93210389Sgabor#define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr))) 94210389Sgabor#define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 95210389Sgabor#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) 96210389Sgabor#define fp_divide_by_0() __asm __volatile( \ 97210389Sgabor "fldz; fld1; fdiv %st,%st(1); fnop") 98226261Sgabor#define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr))) 99226261Sgabor#ifdef CPU_ENABLE_SSE 100226261Sgabor#define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) 101226261Sgabor#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) 102226261Sgabor#endif 103210389Sgabor#ifdef XEN 104210389Sgabor#define start_emulating() (HYPERVISOR_fpu_taskswitch(1)) 105210389Sgabor#define stop_emulating() (HYPERVISOR_fpu_taskswitch(0)) 106210389Sgabor#else 107210389Sgabor#define start_emulating() __asm __volatile( \ 108210389Sgabor "smsw %%ax; orb %0,%%al; lmsw %%ax" \ 109210389Sgabor : : "n" (CR0_TS) : "ax") 110210389Sgabor#define stop_emulating() __asm __volatile("clts") 111210389Sgabor#endif 112210389Sgabor#else /* !(__GNUCLIKE_ASM && !lint) */ 113210389Sgabor 114210389Sgaborvoid fldcw(u_short cw); 115211364Sgaborvoid fnclex(void); 116226261Sgaborvoid fninit(void); 117226261Sgaborvoid fnsave(caddr_t addr); 118210461Sgaborvoid fnstcw(caddr_t addr); 119210461Sgaborvoid fnstsw(caddr_t addr); 120210389Sgaborvoid fp_divide_by_0(void); 121210389Sgaborvoid frstor(caddr_t addr); 122210389Sgabor#ifdef CPU_ENABLE_SSE 123210389Sgaborvoid fxsave(caddr_t addr); 124210578Sgaborvoid fxrstor(caddr_t addr); 125226261Sgabor#endif 126210578Sgaborvoid start_emulating(void); 127210389Sgaborvoid stop_emulating(void); 128226261Sgabor 129210389Sgabor#endif /* __GNUCLIKE_ASM && !lint */ 130210389Sgabor 131210389Sgabor#ifdef CPU_ENABLE_SSE 132210389Sgabor#define GET_FPU_CW(thread) \ 133210389Sgabor (cpu_fxsr ? \ 134210389Sgabor (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \ 135210578Sgabor (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw) 136210389Sgabor#define GET_FPU_SW(thread) \ 137210389Sgabor (cpu_fxsr ? \ 138210389Sgabor (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \ 139210389Sgabor (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw) 140210389Sgabor#define SET_FPU_CW(savefpu, value) do { \ 141210578Sgabor if (cpu_fxsr) \ 142210389Sgabor (savefpu)->sv_xmm.sv_env.en_cw = (value); \ 143210389Sgabor else \ 144210389Sgabor (savefpu)->sv_87.sv_env.en_cw = (value); \ 145210389Sgabor} while (0) 146210389Sgabor#else /* CPU_ENABLE_SSE */ 147210389Sgabor#define GET_FPU_CW(thread) \ 148210389Sgabor (thread->td_pcb->pcb_save->sv_87.sv_env.en_cw) 149210389Sgabor#define GET_FPU_SW(thread) \ 150210389Sgabor (thread->td_pcb->pcb_save->sv_87.sv_env.en_sw) 151210389Sgabor#define SET_FPU_CW(savefpu, value) \ 152210389Sgabor (savefpu)->sv_87.sv_env.en_cw = (value) 153#endif /* CPU_ENABLE_SSE */ 154 155typedef u_char bool_t; 156 157#ifdef CPU_ENABLE_SSE 158static void fpu_clean_state(void); 159#endif 160 161static void fpusave(union savefpu *); 162static void fpurstor(union savefpu *); 163static int npx_attach(device_t dev); 164static void npx_identify(driver_t *driver, device_t parent); 165static int npx_probe(device_t dev); 166 167int hw_float; 168 169SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, 170 &hw_float, 0, "Floating point instructions executed in hardware"); 171 172static volatile u_int npx_traps_while_probing; 173static union savefpu npx_initialstate; 174 175alias_for_inthand_t probetrap; 176__asm(" \n\ 177 .text \n\ 178 .p2align 2,0x90 \n\ 179 .type " __XSTRING(CNAME(probetrap)) ",@function \n\ 180" __XSTRING(CNAME(probetrap)) ": \n\ 181 ss \n\ 182 incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\ 183 fnclex \n\ 184 iret \n\ 185"); 186 187/* 188 * Identify routine. Create a connection point on our parent for probing. 189 */ 190static void 191npx_identify(driver, parent) 192 driver_t *driver; 193 device_t parent; 194{ 195 device_t child; 196 197 child = BUS_ADD_CHILD(parent, 0, "npx", 0); 198 if (child == NULL) 199 panic("npx_identify"); 200} 201 202/* 203 * Probe routine. Set flags to tell npxattach() what to do. Set up an 204 * interrupt handler if npx needs to use interrupts. 205 */ 206static int 207npx_probe(device_t dev) 208{ 209 struct gate_descriptor save_idt_npxtrap; 210 u_short control, status; 211 212 device_set_desc(dev, "math processor"); 213 214 /* 215 * Modern CPUs all have an FPU that uses the INT16 interface 216 * and provide a simple way to verify that, so handle the 217 * common case right away. 218 */ 219 if (cpu_feature & CPUID_FPU) { 220 hw_float = 1; 221 device_quiet(dev); 222 return (0); 223 } 224 225 save_idt_npxtrap = idt[IDT_MF]; 226 setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL, 227 GSEL(GCODE_SEL, SEL_KPL)); 228 229 /* 230 * Don't trap while we're probing. 231 */ 232 stop_emulating(); 233 234 /* 235 * Finish resetting the coprocessor, if any. If there is an error 236 * pending, then we may get a bogus IRQ13, but npx_intr() will handle 237 * it OK. Bogus halts have never been observed, but we enabled 238 * IRQ13 and cleared the BUSY# latch early to handle them anyway. 239 */ 240 fninit(); 241 242 /* 243 * Don't use fwait here because it might hang. 244 * Don't use fnop here because it usually hangs if there is no FPU. 245 */ 246 DELAY(1000); /* wait for any IRQ13 */ 247#ifdef DIAGNOSTIC 248 if (npx_traps_while_probing != 0) 249 printf("fninit caused %u bogus npx trap(s)\n", 250 npx_traps_while_probing); 251#endif 252 /* 253 * Check for a status of mostly zero. 254 */ 255 status = 0x5a5a; 256 fnstsw(&status); 257 if ((status & 0xb8ff) == 0) { 258 /* 259 * Good, now check for a proper control word. 260 */ 261 control = 0x5a5a; 262 fnstcw(&control); 263 if ((control & 0x1f3f) == 0x033f) { 264 /* 265 * We have an npx, now divide by 0 to see if exception 266 * 16 works. 267 */ 268 control &= ~(1 << 2); /* enable divide by 0 trap */ 269 fldcw(control); 270#ifdef FPU_ERROR_BROKEN 271 /* 272 * FPU error signal doesn't work on some CPU 273 * accelerator board. 274 */ 275 hw_float = 1; 276 return (0); 277#endif 278 npx_traps_while_probing = 0; 279 fp_divide_by_0(); 280 if (npx_traps_while_probing != 0) { 281 /* 282 * Good, exception 16 works. 283 */ 284 hw_float = 1; 285 goto cleanup; 286 } 287 device_printf(dev, 288 "FPU does not use exception 16 for error reporting\n"); 289 goto cleanup; 290 } 291 } 292 293 /* 294 * Probe failed. Floating point simply won't work. 295 * Notify user and disable FPU/MMX/SSE instruction execution. 296 */ 297 device_printf(dev, "WARNING: no FPU!\n"); 298 __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : : 299 "n" (CR0_EM | CR0_MP) : "ax"); 300 301cleanup: 302 idt[IDT_MF] = save_idt_npxtrap; 303 return (hw_float ? 0 : ENXIO); 304} 305 306/* 307 * Attach routine - announce which it is, and wire into system 308 */ 309static int 310npx_attach(device_t dev) 311{ 312 313 npxinit(); 314 critical_enter(); 315 stop_emulating(); 316 fpusave(&npx_initialstate); 317 start_emulating(); 318#ifdef CPU_ENABLE_SSE 319 if (cpu_fxsr) { 320 if (npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask) 321 cpu_mxcsr_mask = 322 npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask; 323 else 324 cpu_mxcsr_mask = 0xFFBF; 325 bzero(npx_initialstate.sv_xmm.sv_fp, 326 sizeof(npx_initialstate.sv_xmm.sv_fp)); 327 bzero(npx_initialstate.sv_xmm.sv_xmm, 328 sizeof(npx_initialstate.sv_xmm.sv_xmm)); 329 /* XXX might need even more zeroing. */ 330 } else 331#endif 332 bzero(npx_initialstate.sv_87.sv_ac, 333 sizeof(npx_initialstate.sv_87.sv_ac)); 334 critical_exit(); 335 336 return (0); 337} 338 339/* 340 * Initialize floating point unit. 341 */ 342void 343npxinit(void) 344{ 345 static union savefpu dummy; 346 register_t saveintr; 347 u_short control; 348 349 if (!hw_float) 350 return; 351 /* 352 * fninit has the same h/w bugs as fnsave. Use the detoxified 353 * fnsave to throw away any junk in the fpu. npxsave() initializes 354 * the fpu and sets fpcurthread = NULL as important side effects. 355 * 356 * It is too early for critical_enter() to work on AP. 357 */ 358 saveintr = intr_disable(); 359 npxsave(&dummy); 360 stop_emulating(); 361#ifdef CPU_ENABLE_SSE 362 /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */ 363 if (cpu_fxsr) 364 fninit(); 365#endif 366 control = __INITIAL_NPXCW__; 367 fldcw(control); 368 start_emulating(); 369 intr_restore(saveintr); 370} 371 372/* 373 * Free coprocessor (if we have it). 374 */ 375void 376npxexit(td) 377 struct thread *td; 378{ 379 380 critical_enter(); 381 if (curthread == PCPU_GET(fpcurthread)) 382 npxsave(PCPU_GET(curpcb)->pcb_save); 383 critical_exit(); 384#ifdef NPX_DEBUG 385 if (hw_float) { 386 u_int masked_exceptions; 387 388 masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f; 389 /* 390 * Log exceptions that would have trapped with the old 391 * control word (overflow, divide by 0, and invalid operand). 392 */ 393 if (masked_exceptions & 0x0d) 394 log(LOG_ERR, 395 "pid %d (%s) exited with masked floating point exceptions 0x%02x\n", 396 td->td_proc->p_pid, td->td_proc->p_comm, 397 masked_exceptions); 398 } 399#endif 400} 401 402int 403npxformat() 404{ 405 406 if (!hw_float) 407 return (_MC_FPFMT_NODEV); 408#ifdef CPU_ENABLE_SSE 409 if (cpu_fxsr) 410 return (_MC_FPFMT_XMM); 411#endif 412 return (_MC_FPFMT_387); 413} 414 415/* 416 * The following mechanism is used to ensure that the FPE_... value 417 * that is passed as a trapcode to the signal handler of the user 418 * process does not have more than one bit set. 419 * 420 * Multiple bits may be set if the user process modifies the control 421 * word while a status word bit is already set. While this is a sign 422 * of bad coding, we have no choise than to narrow them down to one 423 * bit, since we must not send a trapcode that is not exactly one of 424 * the FPE_ macros. 425 * 426 * The mechanism has a static table with 127 entries. Each combination 427 * of the 7 FPU status word exception bits directly translates to a 428 * position in this table, where a single FPE_... value is stored. 429 * This FPE_... value stored there is considered the "most important" 430 * of the exception bits and will be sent as the signal code. The 431 * precedence of the bits is based upon Intel Document "Numerical 432 * Applications", Chapter "Special Computational Situations". 433 * 434 * The macro to choose one of these values does these steps: 1) Throw 435 * away status word bits that cannot be masked. 2) Throw away the bits 436 * currently masked in the control word, assuming the user isn't 437 * interested in them anymore. 3) Reinsert status word bit 7 (stack 438 * fault) if it is set, which cannot be masked but must be presered. 439 * 4) Use the remaining bits to point into the trapcode table. 440 * 441 * The 6 maskable bits in order of their preference, as stated in the 442 * above referenced Intel manual: 443 * 1 Invalid operation (FP_X_INV) 444 * 1a Stack underflow 445 * 1b Stack overflow 446 * 1c Operand of unsupported format 447 * 1d SNaN operand. 448 * 2 QNaN operand (not an exception, irrelavant here) 449 * 3 Any other invalid-operation not mentioned above or zero divide 450 * (FP_X_INV, FP_X_DZ) 451 * 4 Denormal operand (FP_X_DNML) 452 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 453 * 6 Inexact result (FP_X_IMP) 454 */ 455static char fpetable[128] = { 456 0, 457 FPE_FLTINV, /* 1 - INV */ 458 FPE_FLTUND, /* 2 - DNML */ 459 FPE_FLTINV, /* 3 - INV | DNML */ 460 FPE_FLTDIV, /* 4 - DZ */ 461 FPE_FLTINV, /* 5 - INV | DZ */ 462 FPE_FLTDIV, /* 6 - DNML | DZ */ 463 FPE_FLTINV, /* 7 - INV | DNML | DZ */ 464 FPE_FLTOVF, /* 8 - OFL */ 465 FPE_FLTINV, /* 9 - INV | OFL */ 466 FPE_FLTUND, /* A - DNML | OFL */ 467 FPE_FLTINV, /* B - INV | DNML | OFL */ 468 FPE_FLTDIV, /* C - DZ | OFL */ 469 FPE_FLTINV, /* D - INV | DZ | OFL */ 470 FPE_FLTDIV, /* E - DNML | DZ | OFL */ 471 FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ 472 FPE_FLTUND, /* 10 - UFL */ 473 FPE_FLTINV, /* 11 - INV | UFL */ 474 FPE_FLTUND, /* 12 - DNML | UFL */ 475 FPE_FLTINV, /* 13 - INV | DNML | UFL */ 476 FPE_FLTDIV, /* 14 - DZ | UFL */ 477 FPE_FLTINV, /* 15 - INV | DZ | UFL */ 478 FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ 479 FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ 480 FPE_FLTOVF, /* 18 - OFL | UFL */ 481 FPE_FLTINV, /* 19 - INV | OFL | UFL */ 482 FPE_FLTUND, /* 1A - DNML | OFL | UFL */ 483 FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ 484 FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ 485 FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ 486 FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ 487 FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ 488 FPE_FLTRES, /* 20 - IMP */ 489 FPE_FLTINV, /* 21 - INV | IMP */ 490 FPE_FLTUND, /* 22 - DNML | IMP */ 491 FPE_FLTINV, /* 23 - INV | DNML | IMP */ 492 FPE_FLTDIV, /* 24 - DZ | IMP */ 493 FPE_FLTINV, /* 25 - INV | DZ | IMP */ 494 FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ 495 FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ 496 FPE_FLTOVF, /* 28 - OFL | IMP */ 497 FPE_FLTINV, /* 29 - INV | OFL | IMP */ 498 FPE_FLTUND, /* 2A - DNML | OFL | IMP */ 499 FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ 500 FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ 501 FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ 502 FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ 503 FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ 504 FPE_FLTUND, /* 30 - UFL | IMP */ 505 FPE_FLTINV, /* 31 - INV | UFL | IMP */ 506 FPE_FLTUND, /* 32 - DNML | UFL | IMP */ 507 FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ 508 FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ 509 FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ 510 FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ 511 FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ 512 FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ 513 FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ 514 FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ 515 FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ 516 FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ 517 FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ 518 FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ 519 FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ 520 FPE_FLTSUB, /* 40 - STK */ 521 FPE_FLTSUB, /* 41 - INV | STK */ 522 FPE_FLTUND, /* 42 - DNML | STK */ 523 FPE_FLTSUB, /* 43 - INV | DNML | STK */ 524 FPE_FLTDIV, /* 44 - DZ | STK */ 525 FPE_FLTSUB, /* 45 - INV | DZ | STK */ 526 FPE_FLTDIV, /* 46 - DNML | DZ | STK */ 527 FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ 528 FPE_FLTOVF, /* 48 - OFL | STK */ 529 FPE_FLTSUB, /* 49 - INV | OFL | STK */ 530 FPE_FLTUND, /* 4A - DNML | OFL | STK */ 531 FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ 532 FPE_FLTDIV, /* 4C - DZ | OFL | STK */ 533 FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ 534 FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ 535 FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ 536 FPE_FLTUND, /* 50 - UFL | STK */ 537 FPE_FLTSUB, /* 51 - INV | UFL | STK */ 538 FPE_FLTUND, /* 52 - DNML | UFL | STK */ 539 FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ 540 FPE_FLTDIV, /* 54 - DZ | UFL | STK */ 541 FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ 542 FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ 543 FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ 544 FPE_FLTOVF, /* 58 - OFL | UFL | STK */ 545 FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ 546 FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ 547 FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ 548 FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ 549 FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ 550 FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ 551 FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ 552 FPE_FLTRES, /* 60 - IMP | STK */ 553 FPE_FLTSUB, /* 61 - INV | IMP | STK */ 554 FPE_FLTUND, /* 62 - DNML | IMP | STK */ 555 FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ 556 FPE_FLTDIV, /* 64 - DZ | IMP | STK */ 557 FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ 558 FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ 559 FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ 560 FPE_FLTOVF, /* 68 - OFL | IMP | STK */ 561 FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ 562 FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ 563 FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ 564 FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ 565 FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ 566 FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ 567 FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ 568 FPE_FLTUND, /* 70 - UFL | IMP | STK */ 569 FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ 570 FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ 571 FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ 572 FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ 573 FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ 574 FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ 575 FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ 576 FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ 577 FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ 578 FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ 579 FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ 580 FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ 581 FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ 582 FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ 583 FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ 584}; 585 586/* 587 * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. 588 * 589 * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now 590 * depend on longjmp() restoring a usable state. Restoring the state 591 * or examining it might fail if we didn't clear exceptions. 592 * 593 * The error code chosen will be one of the FPE_... macros. It will be 594 * sent as the second argument to old BSD-style signal handlers and as 595 * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers. 596 * 597 * XXX the FP state is not preserved across signal handlers. So signal 598 * handlers cannot afford to do FP unless they preserve the state or 599 * longjmp() out. Both preserving the state and longjmp()ing may be 600 * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable 601 * solution for signals other than SIGFPE. 602 */ 603int 604npxtrap() 605{ 606 u_short control, status; 607 608 if (!hw_float) { 609 printf("npxtrap: fpcurthread = %p, curthread = %p, hw_float = %d\n", 610 PCPU_GET(fpcurthread), curthread, hw_float); 611 panic("npxtrap from nowhere"); 612 } 613 critical_enter(); 614 615 /* 616 * Interrupt handling (for another interrupt) may have pushed the 617 * state to memory. Fetch the relevant parts of the state from 618 * wherever they are. 619 */ 620 if (PCPU_GET(fpcurthread) != curthread) { 621 control = GET_FPU_CW(curthread); 622 status = GET_FPU_SW(curthread); 623 } else { 624 fnstcw(&control); 625 fnstsw(&status); 626 } 627 628 if (PCPU_GET(fpcurthread) == curthread) 629 fnclex(); 630 critical_exit(); 631 return (fpetable[status & ((~control & 0x3f) | 0x40)]); 632} 633 634/* 635 * Implement device not available (DNA) exception 636 * 637 * It would be better to switch FP context here (if curthread != fpcurthread) 638 * and not necessarily for every context switch, but it is too hard to 639 * access foreign pcb's. 640 */ 641 642static int err_count = 0; 643 644int 645npxdna(void) 646{ 647 struct pcb *pcb; 648 649 if (!hw_float) 650 return (0); 651 critical_enter(); 652 if (PCPU_GET(fpcurthread) == curthread) { 653 printf("npxdna: fpcurthread == curthread %d times\n", 654 ++err_count); 655 stop_emulating(); 656 critical_exit(); 657 return (1); 658 } 659 if (PCPU_GET(fpcurthread) != NULL) { 660 printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n", 661 PCPU_GET(fpcurthread), 662 PCPU_GET(fpcurthread)->td_proc->p_pid, 663 curthread, curthread->td_proc->p_pid); 664 panic("npxdna"); 665 } 666 stop_emulating(); 667 /* 668 * Record new context early in case frstor causes an IRQ13. 669 */ 670 PCPU_SET(fpcurthread, curthread); 671 pcb = PCPU_GET(curpcb); 672 673#ifdef CPU_ENABLE_SSE 674 if (cpu_fxsr) 675 fpu_clean_state(); 676#endif 677 678 if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { 679 /* 680 * This is the first time this thread has used the FPU or 681 * the PCB doesn't contain a clean FPU state. Explicitly 682 * load an initial state. 683 */ 684 fpurstor(&npx_initialstate); 685 if (pcb->pcb_initial_npxcw != __INITIAL_NPXCW__) 686 fldcw(pcb->pcb_initial_npxcw); 687 pcb->pcb_flags |= PCB_NPXINITDONE; 688 if (PCB_USER_FPU(pcb)) 689 pcb->pcb_flags |= PCB_NPXUSERINITDONE; 690 } else { 691 /* 692 * The following fpurstor() may cause an IRQ13 when the 693 * state being restored has a pending error. The error will 694 * appear to have been triggered by the current (npx) user 695 * instruction even when that instruction is a no-wait 696 * instruction that should not trigger an error (e.g., 697 * fnclex). On at least one 486 system all of the no-wait 698 * instructions are broken the same as frstor, so our 699 * treatment does not amplify the breakage. On at least 700 * one 386/Cyrix 387 system, fnclex works correctly while 701 * frstor and fnsave are broken, so our treatment breaks 702 * fnclex if it is the first FPU instruction after a context 703 * switch. 704 */ 705 fpurstor(pcb->pcb_save); 706 } 707 critical_exit(); 708 709 return (1); 710} 711 712/* 713 * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx 714 * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by 715 * no-wait npx instructions. See the Intel application note AP-578 for 716 * details. This doesn't cause any additional complications here. IRQ13's 717 * are inherently asynchronous unless the CPU is frozen to deliver them -- 718 * one that started in userland may be delivered many instructions later, 719 * after the process has entered the kernel. It may even be delivered after 720 * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in 721 * the same way as a very-late-arriving non-spurious IRQ13 from user mode: 722 * it is normally ignored at first because we set fpcurthread to NULL; it is 723 * normally retriggered in npxdna() after return to user mode. 724 * 725 * npxsave() must be called with interrupts disabled, so that it clears 726 * fpcurthread atomically with saving the state. We require callers to do the 727 * disabling, since most callers need to disable interrupts anyway to call 728 * npxsave() atomically with checking fpcurthread. 729 * 730 * A previous version of npxsave() went to great lengths to excecute fnsave 731 * with interrupts enabled in case executing it froze the CPU. This case 732 * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply 733 * spurious freezes. 734 */ 735void 736npxsave(addr) 737 union savefpu *addr; 738{ 739 740 stop_emulating(); 741 fpusave(addr); 742 743 start_emulating(); 744 PCPU_SET(fpcurthread, NULL); 745} 746 747void 748npxdrop() 749{ 750 struct thread *td; 751 752 /* 753 * Discard pending exceptions in the !cpu_fxsr case so that unmasked 754 * ones don't cause a panic on the next frstor. 755 */ 756#ifdef CPU_ENABLE_SSE 757 if (!cpu_fxsr) 758#endif 759 fnclex(); 760 761 td = PCPU_GET(fpcurthread); 762 KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); 763 CRITICAL_ASSERT(td); 764 PCPU_SET(fpcurthread, NULL); 765 td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; 766 start_emulating(); 767} 768 769/* 770 * Get the state of the FPU without dropping ownership (if possible). 771 * It returns the FPU ownership status. 772 */ 773int 774npxgetregs(struct thread *td, union savefpu *addr) 775{ 776 struct pcb *pcb; 777 778 if (!hw_float) 779 return (_MC_FPOWNED_NONE); 780 781 pcb = td->td_pcb; 782 if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) { 783 bcopy(&npx_initialstate, addr, sizeof(npx_initialstate)); 784 SET_FPU_CW(addr, pcb->pcb_initial_npxcw); 785 return (_MC_FPOWNED_NONE); 786 } 787 critical_enter(); 788 if (td == PCPU_GET(fpcurthread)) { 789 fpusave(addr); 790#ifdef CPU_ENABLE_SSE 791 if (!cpu_fxsr) 792#endif 793 /* 794 * fnsave initializes the FPU and destroys whatever 795 * context it contains. Make sure the FPU owner 796 * starts with a clean state next time. 797 */ 798 npxdrop(); 799 critical_exit(); 800 return (_MC_FPOWNED_FPU); 801 } else { 802 critical_exit(); 803 bcopy(pcb->pcb_save, addr, sizeof(*addr)); 804 return (_MC_FPOWNED_PCB); 805 } 806} 807 808int 809npxgetuserregs(struct thread *td, union savefpu *addr) 810{ 811 struct pcb *pcb; 812 813 if (!hw_float) 814 return (_MC_FPOWNED_NONE); 815 816 pcb = td->td_pcb; 817 if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) == 0) { 818 bcopy(&npx_initialstate, addr, sizeof(npx_initialstate)); 819 SET_FPU_CW(addr, pcb->pcb_initial_npxcw); 820 return (_MC_FPOWNED_NONE); 821 } 822 critical_enter(); 823 if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { 824 fpusave(addr); 825#ifdef CPU_ENABLE_SSE 826 if (!cpu_fxsr) 827#endif 828 /* 829 * fnsave initializes the FPU and destroys whatever 830 * context it contains. Make sure the FPU owner 831 * starts with a clean state next time. 832 */ 833 npxdrop(); 834 critical_exit(); 835 return (_MC_FPOWNED_FPU); 836 } else { 837 critical_exit(); 838 bcopy(&pcb->pcb_user_save, addr, sizeof(*addr)); 839 return (_MC_FPOWNED_PCB); 840 } 841} 842 843/* 844 * Set the state of the FPU. 845 */ 846void 847npxsetregs(struct thread *td, union savefpu *addr) 848{ 849 struct pcb *pcb; 850 851 if (!hw_float) 852 return; 853 854 pcb = td->td_pcb; 855 critical_enter(); 856 if (td == PCPU_GET(fpcurthread)) { 857#ifdef CPU_ENABLE_SSE 858 if (!cpu_fxsr) 859#endif 860 fnclex(); /* As in npxdrop(). */ 861 fpurstor(addr); 862 critical_exit(); 863 } else { 864 critical_exit(); 865 bcopy(addr, pcb->pcb_save, sizeof(*addr)); 866 } 867 if (PCB_USER_FPU(pcb)) 868 pcb->pcb_flags |= PCB_NPXUSERINITDONE; 869 pcb->pcb_flags |= PCB_NPXINITDONE; 870} 871 872void 873npxsetuserregs(struct thread *td, union savefpu *addr) 874{ 875 struct pcb *pcb; 876 877 if (!hw_float) 878 return; 879 880 pcb = td->td_pcb; 881 critical_enter(); 882 if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { 883#ifdef CPU_ENABLE_SSE 884 if (!cpu_fxsr) 885#endif 886 fnclex(); /* As in npxdrop(). */ 887 fpurstor(addr); 888 critical_exit(); 889 pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE; 890 } else { 891 critical_exit(); 892 bcopy(addr, &pcb->pcb_user_save, sizeof(*addr)); 893 if (PCB_USER_FPU(pcb)) 894 pcb->pcb_flags |= PCB_NPXINITDONE; 895 pcb->pcb_flags |= PCB_NPXUSERINITDONE; 896 } 897} 898 899static void 900fpusave(addr) 901 union savefpu *addr; 902{ 903 904#ifdef CPU_ENABLE_SSE 905 if (cpu_fxsr) 906 fxsave(addr); 907 else 908#endif 909 fnsave(addr); 910} 911 912#ifdef CPU_ENABLE_SSE 913/* 914 * On AuthenticAMD processors, the fxrstor instruction does not restore 915 * the x87's stored last instruction pointer, last data pointer, and last 916 * opcode values, except in the rare case in which the exception summary 917 * (ES) bit in the x87 status word is set to 1. 918 * 919 * In order to avoid leaking this information across processes, we clean 920 * these values by performing a dummy load before executing fxrstor(). 921 */ 922static void 923fpu_clean_state(void) 924{ 925 static float dummy_variable = 0.0; 926 u_short status; 927 928 /* 929 * Clear the ES bit in the x87 status word if it is currently 930 * set, in order to avoid causing a fault in the upcoming load. 931 */ 932 fnstsw(&status); 933 if (status & 0x80) 934 fnclex(); 935 936 /* 937 * Load the dummy variable into the x87 stack. This mangles 938 * the x87 stack, but we don't care since we're about to call 939 * fxrstor() anyway. 940 */ 941 __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); 942} 943#endif /* CPU_ENABLE_SSE */ 944 945static void 946fpurstor(addr) 947 union savefpu *addr; 948{ 949 950#ifdef CPU_ENABLE_SSE 951 if (cpu_fxsr) 952 fxrstor(addr); 953 else 954#endif 955 frstor(addr); 956} 957 958static device_method_t npx_methods[] = { 959 /* Device interface */ 960 DEVMETHOD(device_identify, npx_identify), 961 DEVMETHOD(device_probe, npx_probe), 962 DEVMETHOD(device_attach, npx_attach), 963 DEVMETHOD(device_detach, bus_generic_detach), 964 DEVMETHOD(device_shutdown, bus_generic_shutdown), 965 DEVMETHOD(device_suspend, bus_generic_suspend), 966 DEVMETHOD(device_resume, bus_generic_resume), 967 968 { 0, 0 } 969}; 970 971static driver_t npx_driver = { 972 "npx", 973 npx_methods, 974 1, /* no softc */ 975}; 976 977static devclass_t npx_devclass; 978 979/* 980 * We prefer to attach to the root nexus so that the usual case (exception 16) 981 * doesn't describe the processor as being `on isa'. 982 */ 983DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0); 984 985#ifdef DEV_ISA 986/* 987 * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. 988 */ 989static struct isa_pnp_id npxisa_ids[] = { 990 { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ 991 { 0 } 992}; 993 994static int 995npxisa_probe(device_t dev) 996{ 997 int result; 998 if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) { 999 device_quiet(dev); 1000 } 1001 return(result); 1002} 1003 1004static int 1005npxisa_attach(device_t dev) 1006{ 1007 return (0); 1008} 1009 1010static device_method_t npxisa_methods[] = { 1011 /* Device interface */ 1012 DEVMETHOD(device_probe, npxisa_probe), 1013 DEVMETHOD(device_attach, npxisa_attach), 1014 DEVMETHOD(device_detach, bus_generic_detach), 1015 DEVMETHOD(device_shutdown, bus_generic_shutdown), 1016 DEVMETHOD(device_suspend, bus_generic_suspend), 1017 DEVMETHOD(device_resume, bus_generic_resume), 1018 1019 { 0, 0 } 1020}; 1021 1022static driver_t npxisa_driver = { 1023 "npxisa", 1024 npxisa_methods, 1025 1, /* no softc */ 1026}; 1027 1028static devclass_t npxisa_devclass; 1029 1030DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0); 1031#ifndef PC98 1032DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0); 1033#endif 1034#endif /* DEV_ISA */ 1035 1036int 1037fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) 1038{ 1039 struct pcb *pcb; 1040 1041 pcb = td->td_pcb; 1042 KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save, 1043 ("mangled pcb_save")); 1044 ctx->flags = 0; 1045 if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0) 1046 ctx->flags |= FPU_KERN_CTX_NPXINITDONE; 1047 npxexit(td); 1048 ctx->prev = pcb->pcb_save; 1049 pcb->pcb_save = &ctx->hwstate; 1050 pcb->pcb_flags |= PCB_KERNNPX; 1051 pcb->pcb_flags &= ~PCB_NPXINITDONE; 1052 return (0); 1053} 1054 1055int 1056fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) 1057{ 1058 struct pcb *pcb; 1059 1060 pcb = td->td_pcb; 1061 critical_enter(); 1062 if (curthread == PCPU_GET(fpcurthread)) 1063 npxdrop(); 1064 critical_exit(); 1065 pcb->pcb_save = ctx->prev; 1066 if (pcb->pcb_save == &pcb->pcb_user_save) { 1067 if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0) 1068 pcb->pcb_flags |= PCB_NPXINITDONE; 1069 else 1070 pcb->pcb_flags &= ~PCB_NPXINITDONE; 1071 pcb->pcb_flags &= ~PCB_KERNNPX; 1072 } else { 1073 if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0) 1074 pcb->pcb_flags |= PCB_NPXINITDONE; 1075 else 1076 pcb->pcb_flags &= ~PCB_NPXINITDONE; 1077 KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); 1078 } 1079 return (0); 1080} 1081 1082int 1083fpu_kern_thread(u_int flags) 1084{ 1085 struct pcb *pcb; 1086 1087 pcb = PCPU_GET(curpcb); 1088 KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, 1089 ("Only kthread may use fpu_kern_thread")); 1090 KASSERT(pcb->pcb_save == &pcb->pcb_user_save, ("mangled pcb_save")); 1091 KASSERT(PCB_USER_FPU(pcb), ("recursive call")); 1092 1093 pcb->pcb_flags |= PCB_KERNNPX; 1094 return (0); 1095} 1096 1097int 1098is_fpu_kern_thread(u_int flags) 1099{ 1100 1101 if ((curthread->td_pflags & TDP_KTHREAD) == 0) 1102 return (0); 1103 return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNNPX) != 0); 1104} 1105