fpu.c revision 230426
1/*- 2 * Copyright (c) 1990 William Jolitz. 3 * Copyright (c) 1991 The Regents of the University of California. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 4. Neither the name of the University nor the names of its contributors 15 * may be used to endorse or promote products derived from this software 16 * without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * from: @(#)npx.c 7.2 (Berkeley) 5/12/91 31 */ 32 33#include <sys/cdefs.h> 34__FBSDID("$FreeBSD: head/sys/amd64/amd64/fpu.c 230426 2012-01-21 17:45:27Z kib $"); 35 36#include <sys/param.h> 37#include <sys/systm.h> 38#include <sys/bus.h> 39#include <sys/kernel.h> 40#include <sys/lock.h> 41#include <sys/malloc.h> 42#include <sys/module.h> 43#include <sys/mutex.h> 44#include <sys/mutex.h> 45#include <sys/proc.h> 46#include <sys/sysctl.h> 47#include <machine/bus.h> 48#include <sys/rman.h> 49#include <sys/signalvar.h> 50 51#include <machine/cputypes.h> 52#include <machine/frame.h> 53#include <machine/intr_machdep.h> 54#include <machine/md_var.h> 55#include <machine/pcb.h> 56#include <machine/psl.h> 57#include <machine/resource.h> 58#include <machine/specialreg.h> 59#include <machine/segments.h> 60#include <machine/ucontext.h> 61 62/* 63 * Floating point support. 64 */ 65 66#if defined(__GNUCLIKE_ASM) && !defined(lint) 67 68#define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw)) 69#define fnclex() __asm __volatile("fnclex") 70#define fninit() __asm __volatile("fninit") 71#define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 72#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr))) 73#define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr))) 74#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) 75#define ldmxcsr(csr) __asm __volatile("ldmxcsr %0" : : "m" (csr)) 76#define start_emulating() __asm __volatile( \ 77 "smsw %%ax; orb %0,%%al; lmsw %%ax" \ 78 : : "n" (CR0_TS) : "ax") 79#define stop_emulating() __asm __volatile("clts") 80 81#else /* !(__GNUCLIKE_ASM && !lint) */ 82 83void fldcw(u_short cw); 84void fnclex(void); 85void fninit(void); 86void fnstcw(caddr_t addr); 87void fnstsw(caddr_t addr); 88void fxsave(caddr_t addr); 89void fxrstor(caddr_t addr); 90void ldmxcsr(u_int csr); 91void start_emulating(void); 92void stop_emulating(void); 93 94#endif /* __GNUCLIKE_ASM && !lint */ 95 96#define GET_FPU_CW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_cw) 97#define GET_FPU_SW(thread) ((thread)->td_pcb->pcb_save->sv_env.en_sw) 98 99CTASSERT(sizeof(struct savefpu) == 512); 100CTASSERT(sizeof(struct xstate_hdr) == 64); 101CTASSERT(sizeof(struct savefpu_ymm) == 832); 102 103/* 104 * This requirement is to make it easier for asm code to calculate 105 * offset of the fpu save area from the pcb address. FPU save area 106 * must by 64-bytes aligned. 107 */ 108CTASSERT(sizeof(struct pcb) % XSAVE_AREA_ALIGN == 0); 109 110static void fpu_clean_state(void); 111 112SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, 113 NULL, 1, "Floating point instructions executed in hardware"); 114 115int use_xsave; /* non-static for cpu_switch.S */ 116uint64_t xsave_mask; /* the same */ 117static struct savefpu *fpu_initialstate; 118 119void 120fpusave(void *addr) 121{ 122 123 if (use_xsave) 124 xsave((char *)addr, xsave_mask); 125 else 126 fxsave((char *)addr); 127} 128 129static void 130fpurestore(void *addr) 131{ 132 133 if (use_xsave) 134 xrstor((char *)addr, xsave_mask); 135 else 136 fxrstor((char *)addr); 137} 138 139/* 140 * Enable XSAVE if supported and allowed by user. 141 * Calculate the xsave_mask. 142 */ 143static void 144fpuinit_bsp1(void) 145{ 146 u_int cp[4]; 147 uint64_t xsave_mask_user; 148 149 if ((cpu_feature2 & CPUID2_XSAVE) != 0) { 150 use_xsave = 1; 151 TUNABLE_INT_FETCH("hw.use_xsave", &use_xsave); 152 } 153 if (!use_xsave) 154 return; 155 156 cpuid_count(0xd, 0x0, cp); 157 xsave_mask = XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 158 if ((cp[0] & xsave_mask) != xsave_mask) 159 panic("CPU0 does not support X87 or SSE: %x", cp[0]); 160 xsave_mask = ((uint64_t)cp[3] << 32) | cp[0]; 161 xsave_mask_user = xsave_mask; 162 TUNABLE_ULONG_FETCH("hw.xsave_mask", &xsave_mask_user); 163 xsave_mask_user |= XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE; 164 xsave_mask &= xsave_mask_user; 165} 166 167/* 168 * Calculate the fpu save area size. 169 */ 170static void 171fpuinit_bsp2(void) 172{ 173 u_int cp[4]; 174 175 if (use_xsave) { 176 cpuid_count(0xd, 0x0, cp); 177 cpu_max_ext_state_size = cp[1]; 178 179 /* 180 * Reload the cpu_feature2, since we enabled OSXSAVE. 181 */ 182 do_cpuid(1, cp); 183 cpu_feature2 = cp[2]; 184 } else 185 cpu_max_ext_state_size = sizeof(struct savefpu); 186} 187 188/* 189 * Initialize the floating point unit. 190 */ 191void 192fpuinit(void) 193{ 194 register_t saveintr; 195 u_int mxcsr; 196 u_short control; 197 198 if (IS_BSP()) 199 fpuinit_bsp1(); 200 201 if (use_xsave) { 202 load_cr4(rcr4() | CR4_XSAVE); 203 xsetbv(XCR0, xsave_mask); 204 } 205 206 /* 207 * XCR0 shall be set up before CPU can report the save area size. 208 */ 209 if (IS_BSP()) 210 fpuinit_bsp2(); 211 212 /* 213 * It is too early for critical_enter() to work on AP. 214 */ 215 saveintr = intr_disable(); 216 stop_emulating(); 217 fninit(); 218 control = __INITIAL_FPUCW__; 219 fldcw(control); 220 mxcsr = __INITIAL_MXCSR__; 221 ldmxcsr(mxcsr); 222 start_emulating(); 223 intr_restore(saveintr); 224} 225 226/* 227 * On the boot CPU we generate a clean state that is used to 228 * initialize the floating point unit when it is first used by a 229 * process. 230 */ 231static void 232fpuinitstate(void *arg __unused) 233{ 234 register_t saveintr; 235 236 fpu_initialstate = malloc(cpu_max_ext_state_size, M_DEVBUF, 237 M_WAITOK | M_ZERO); 238 saveintr = intr_disable(); 239 stop_emulating(); 240 241 fpusave(fpu_initialstate); 242 if (fpu_initialstate->sv_env.en_mxcsr_mask) 243 cpu_mxcsr_mask = fpu_initialstate->sv_env.en_mxcsr_mask; 244 else 245 cpu_mxcsr_mask = 0xFFBF; 246 247 /* 248 * The fninit instruction does not modify XMM registers. The 249 * fpusave call dumped the garbage contained in the registers 250 * after reset to the initial state saved. Clear XMM 251 * registers file image to make the startup program state and 252 * signal handler XMM register content predictable. 253 */ 254 bzero(&fpu_initialstate->sv_xmm[0], sizeof(struct xmmacc)); 255 256 start_emulating(); 257 intr_restore(saveintr); 258} 259SYSINIT(fpuinitstate, SI_SUB_DRIVERS, SI_ORDER_ANY, fpuinitstate, NULL); 260 261/* 262 * Free coprocessor (if we have it). 263 */ 264void 265fpuexit(struct thread *td) 266{ 267 268 critical_enter(); 269 if (curthread == PCPU_GET(fpcurthread)) { 270 stop_emulating(); 271 fpusave(PCPU_GET(curpcb)->pcb_save); 272 start_emulating(); 273 PCPU_SET(fpcurthread, 0); 274 } 275 critical_exit(); 276} 277 278int 279fpuformat() 280{ 281 282 return (_MC_FPFMT_XMM); 283} 284 285/* 286 * The following mechanism is used to ensure that the FPE_... value 287 * that is passed as a trapcode to the signal handler of the user 288 * process does not have more than one bit set. 289 * 290 * Multiple bits may be set if the user process modifies the control 291 * word while a status word bit is already set. While this is a sign 292 * of bad coding, we have no choise than to narrow them down to one 293 * bit, since we must not send a trapcode that is not exactly one of 294 * the FPE_ macros. 295 * 296 * The mechanism has a static table with 127 entries. Each combination 297 * of the 7 FPU status word exception bits directly translates to a 298 * position in this table, where a single FPE_... value is stored. 299 * This FPE_... value stored there is considered the "most important" 300 * of the exception bits and will be sent as the signal code. The 301 * precedence of the bits is based upon Intel Document "Numerical 302 * Applications", Chapter "Special Computational Situations". 303 * 304 * The macro to choose one of these values does these steps: 1) Throw 305 * away status word bits that cannot be masked. 2) Throw away the bits 306 * currently masked in the control word, assuming the user isn't 307 * interested in them anymore. 3) Reinsert status word bit 7 (stack 308 * fault) if it is set, which cannot be masked but must be presered. 309 * 4) Use the remaining bits to point into the trapcode table. 310 * 311 * The 6 maskable bits in order of their preference, as stated in the 312 * above referenced Intel manual: 313 * 1 Invalid operation (FP_X_INV) 314 * 1a Stack underflow 315 * 1b Stack overflow 316 * 1c Operand of unsupported format 317 * 1d SNaN operand. 318 * 2 QNaN operand (not an exception, irrelavant here) 319 * 3 Any other invalid-operation not mentioned above or zero divide 320 * (FP_X_INV, FP_X_DZ) 321 * 4 Denormal operand (FP_X_DNML) 322 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 323 * 6 Inexact result (FP_X_IMP) 324 */ 325static char fpetable[128] = { 326 0, 327 FPE_FLTINV, /* 1 - INV */ 328 FPE_FLTUND, /* 2 - DNML */ 329 FPE_FLTINV, /* 3 - INV | DNML */ 330 FPE_FLTDIV, /* 4 - DZ */ 331 FPE_FLTINV, /* 5 - INV | DZ */ 332 FPE_FLTDIV, /* 6 - DNML | DZ */ 333 FPE_FLTINV, /* 7 - INV | DNML | DZ */ 334 FPE_FLTOVF, /* 8 - OFL */ 335 FPE_FLTINV, /* 9 - INV | OFL */ 336 FPE_FLTUND, /* A - DNML | OFL */ 337 FPE_FLTINV, /* B - INV | DNML | OFL */ 338 FPE_FLTDIV, /* C - DZ | OFL */ 339 FPE_FLTINV, /* D - INV | DZ | OFL */ 340 FPE_FLTDIV, /* E - DNML | DZ | OFL */ 341 FPE_FLTINV, /* F - INV | DNML | DZ | OFL */ 342 FPE_FLTUND, /* 10 - UFL */ 343 FPE_FLTINV, /* 11 - INV | UFL */ 344 FPE_FLTUND, /* 12 - DNML | UFL */ 345 FPE_FLTINV, /* 13 - INV | DNML | UFL */ 346 FPE_FLTDIV, /* 14 - DZ | UFL */ 347 FPE_FLTINV, /* 15 - INV | DZ | UFL */ 348 FPE_FLTDIV, /* 16 - DNML | DZ | UFL */ 349 FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */ 350 FPE_FLTOVF, /* 18 - OFL | UFL */ 351 FPE_FLTINV, /* 19 - INV | OFL | UFL */ 352 FPE_FLTUND, /* 1A - DNML | OFL | UFL */ 353 FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */ 354 FPE_FLTDIV, /* 1C - DZ | OFL | UFL */ 355 FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */ 356 FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */ 357 FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */ 358 FPE_FLTRES, /* 20 - IMP */ 359 FPE_FLTINV, /* 21 - INV | IMP */ 360 FPE_FLTUND, /* 22 - DNML | IMP */ 361 FPE_FLTINV, /* 23 - INV | DNML | IMP */ 362 FPE_FLTDIV, /* 24 - DZ | IMP */ 363 FPE_FLTINV, /* 25 - INV | DZ | IMP */ 364 FPE_FLTDIV, /* 26 - DNML | DZ | IMP */ 365 FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */ 366 FPE_FLTOVF, /* 28 - OFL | IMP */ 367 FPE_FLTINV, /* 29 - INV | OFL | IMP */ 368 FPE_FLTUND, /* 2A - DNML | OFL | IMP */ 369 FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */ 370 FPE_FLTDIV, /* 2C - DZ | OFL | IMP */ 371 FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */ 372 FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */ 373 FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */ 374 FPE_FLTUND, /* 30 - UFL | IMP */ 375 FPE_FLTINV, /* 31 - INV | UFL | IMP */ 376 FPE_FLTUND, /* 32 - DNML | UFL | IMP */ 377 FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */ 378 FPE_FLTDIV, /* 34 - DZ | UFL | IMP */ 379 FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */ 380 FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */ 381 FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */ 382 FPE_FLTOVF, /* 38 - OFL | UFL | IMP */ 383 FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */ 384 FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */ 385 FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */ 386 FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */ 387 FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */ 388 FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */ 389 FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */ 390 FPE_FLTSUB, /* 40 - STK */ 391 FPE_FLTSUB, /* 41 - INV | STK */ 392 FPE_FLTUND, /* 42 - DNML | STK */ 393 FPE_FLTSUB, /* 43 - INV | DNML | STK */ 394 FPE_FLTDIV, /* 44 - DZ | STK */ 395 FPE_FLTSUB, /* 45 - INV | DZ | STK */ 396 FPE_FLTDIV, /* 46 - DNML | DZ | STK */ 397 FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */ 398 FPE_FLTOVF, /* 48 - OFL | STK */ 399 FPE_FLTSUB, /* 49 - INV | OFL | STK */ 400 FPE_FLTUND, /* 4A - DNML | OFL | STK */ 401 FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */ 402 FPE_FLTDIV, /* 4C - DZ | OFL | STK */ 403 FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */ 404 FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */ 405 FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */ 406 FPE_FLTUND, /* 50 - UFL | STK */ 407 FPE_FLTSUB, /* 51 - INV | UFL | STK */ 408 FPE_FLTUND, /* 52 - DNML | UFL | STK */ 409 FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */ 410 FPE_FLTDIV, /* 54 - DZ | UFL | STK */ 411 FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */ 412 FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */ 413 FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */ 414 FPE_FLTOVF, /* 58 - OFL | UFL | STK */ 415 FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */ 416 FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */ 417 FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */ 418 FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */ 419 FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */ 420 FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */ 421 FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */ 422 FPE_FLTRES, /* 60 - IMP | STK */ 423 FPE_FLTSUB, /* 61 - INV | IMP | STK */ 424 FPE_FLTUND, /* 62 - DNML | IMP | STK */ 425 FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */ 426 FPE_FLTDIV, /* 64 - DZ | IMP | STK */ 427 FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */ 428 FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */ 429 FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */ 430 FPE_FLTOVF, /* 68 - OFL | IMP | STK */ 431 FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */ 432 FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */ 433 FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */ 434 FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */ 435 FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */ 436 FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */ 437 FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */ 438 FPE_FLTUND, /* 70 - UFL | IMP | STK */ 439 FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */ 440 FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */ 441 FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */ 442 FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */ 443 FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */ 444 FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */ 445 FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */ 446 FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */ 447 FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */ 448 FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */ 449 FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */ 450 FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */ 451 FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */ 452 FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */ 453 FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */ 454}; 455 456/* 457 * Preserve the FP status word, clear FP exceptions, then generate a SIGFPE. 458 * 459 * Clearing exceptions is necessary mainly to avoid IRQ13 bugs. We now 460 * depend on longjmp() restoring a usable state. Restoring the state 461 * or examining it might fail if we didn't clear exceptions. 462 * 463 * The error code chosen will be one of the FPE_... macros. It will be 464 * sent as the second argument to old BSD-style signal handlers and as 465 * "siginfo_t->si_code" (second argument) to SA_SIGINFO signal handlers. 466 * 467 * XXX the FP state is not preserved across signal handlers. So signal 468 * handlers cannot afford to do FP unless they preserve the state or 469 * longjmp() out. Both preserving the state and longjmp()ing may be 470 * destroyed by IRQ13 bugs. Clearing FP exceptions is not an acceptable 471 * solution for signals other than SIGFPE. 472 */ 473int 474fputrap() 475{ 476 u_short control, status; 477 478 critical_enter(); 479 480 /* 481 * Interrupt handling (for another interrupt) may have pushed the 482 * state to memory. Fetch the relevant parts of the state from 483 * wherever they are. 484 */ 485 if (PCPU_GET(fpcurthread) != curthread) { 486 control = GET_FPU_CW(curthread); 487 status = GET_FPU_SW(curthread); 488 } else { 489 fnstcw(&control); 490 fnstsw(&status); 491 } 492 493 if (PCPU_GET(fpcurthread) == curthread) 494 fnclex(); 495 critical_exit(); 496 return (fpetable[status & ((~control & 0x3f) | 0x40)]); 497} 498 499/* 500 * Implement device not available (DNA) exception 501 * 502 * It would be better to switch FP context here (if curthread != fpcurthread) 503 * and not necessarily for every context switch, but it is too hard to 504 * access foreign pcb's. 505 */ 506 507static int err_count = 0; 508 509void 510fpudna(void) 511{ 512 struct pcb *pcb; 513 514 critical_enter(); 515 if (PCPU_GET(fpcurthread) == curthread) { 516 printf("fpudna: fpcurthread == curthread %d times\n", 517 ++err_count); 518 stop_emulating(); 519 critical_exit(); 520 return; 521 } 522 if (PCPU_GET(fpcurthread) != NULL) { 523 printf("fpudna: fpcurthread = %p (%d), curthread = %p (%d)\n", 524 PCPU_GET(fpcurthread), 525 PCPU_GET(fpcurthread)->td_proc->p_pid, 526 curthread, curthread->td_proc->p_pid); 527 panic("fpudna"); 528 } 529 stop_emulating(); 530 /* 531 * Record new context early in case frstor causes a trap. 532 */ 533 PCPU_SET(fpcurthread, curthread); 534 pcb = PCPU_GET(curpcb); 535 536 fpu_clean_state(); 537 538 if ((pcb->pcb_flags & PCB_FPUINITDONE) == 0) { 539 /* 540 * This is the first time this thread has used the FPU or 541 * the PCB doesn't contain a clean FPU state. Explicitly 542 * load an initial state. 543 */ 544 fpurestore(fpu_initialstate); 545 if (pcb->pcb_initial_fpucw != __INITIAL_FPUCW__) 546 fldcw(pcb->pcb_initial_fpucw); 547 if (PCB_USER_FPU(pcb)) 548 set_pcb_flags(pcb, 549 PCB_FPUINITDONE | PCB_USERFPUINITDONE); 550 else 551 set_pcb_flags(pcb, PCB_FPUINITDONE); 552 } else 553 fpurestore(pcb->pcb_save); 554 critical_exit(); 555} 556 557void 558fpudrop() 559{ 560 struct thread *td; 561 562 td = PCPU_GET(fpcurthread); 563 KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread")); 564 CRITICAL_ASSERT(td); 565 PCPU_SET(fpcurthread, NULL); 566 clear_pcb_flags(td->td_pcb, PCB_FPUINITDONE); 567 start_emulating(); 568} 569 570/* 571 * Get the user state of the FPU into pcb->pcb_user_save without 572 * dropping ownership (if possible). It returns the FPU ownership 573 * status. 574 */ 575int 576fpugetregs(struct thread *td) 577{ 578 struct pcb *pcb; 579 580 pcb = td->td_pcb; 581 if ((pcb->pcb_flags & PCB_USERFPUINITDONE) == 0) { 582 bcopy(fpu_initialstate, get_pcb_user_save_pcb(pcb), 583 cpu_max_ext_state_size); 584 get_pcb_user_save_pcb(pcb)->sv_env.en_cw = 585 pcb->pcb_initial_fpucw; 586 fpuuserinited(td); 587 return (_MC_FPOWNED_PCB); 588 } 589 critical_enter(); 590 if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { 591 fpusave(get_pcb_user_save_pcb(pcb)); 592 critical_exit(); 593 return (_MC_FPOWNED_FPU); 594 } else { 595 critical_exit(); 596 return (_MC_FPOWNED_PCB); 597 } 598} 599 600void 601fpuuserinited(struct thread *td) 602{ 603 struct pcb *pcb; 604 605 pcb = td->td_pcb; 606 if (PCB_USER_FPU(pcb)) 607 set_pcb_flags(pcb, 608 PCB_FPUINITDONE | PCB_USERFPUINITDONE); 609 else 610 set_pcb_flags(pcb, PCB_FPUINITDONE); 611} 612 613int 614fpusetxstate(struct thread *td, char *xfpustate, size_t xfpustate_size) 615{ 616 struct xstate_hdr *hdr, *ehdr; 617 size_t len, max_len; 618 uint64_t bv; 619 620 /* XXXKIB should we clear all extended state in xstate_bv instead ? */ 621 if (xfpustate == NULL) 622 return (0); 623 if (!use_xsave) 624 return (EOPNOTSUPP); 625 626 len = xfpustate_size; 627 if (len < sizeof(struct xstate_hdr)) 628 return (EINVAL); 629 max_len = cpu_max_ext_state_size - sizeof(struct savefpu); 630 if (len > max_len) 631 return (EINVAL); 632 633 ehdr = (struct xstate_hdr *)xfpustate; 634 bv = ehdr->xstate_bv; 635 636 /* 637 * Avoid #gp. 638 */ 639 if (bv & ~xsave_mask) 640 return (EINVAL); 641 if ((bv & (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)) != 642 (XFEATURE_ENABLED_X87 | XFEATURE_ENABLED_SSE)) 643 return (EINVAL); 644 645 hdr = (struct xstate_hdr *)(get_pcb_user_save_td(td) + 1); 646 647 hdr->xstate_bv = bv; 648 bcopy(xfpustate + sizeof(struct xstate_hdr), 649 (char *)(hdr + 1), len - sizeof(struct xstate_hdr)); 650 651 return (0); 652} 653 654/* 655 * Set the state of the FPU. 656 */ 657int 658fpusetregs(struct thread *td, struct savefpu *addr, char *xfpustate, 659 size_t xfpustate_size) 660{ 661 struct pcb *pcb; 662 int error; 663 664 pcb = td->td_pcb; 665 critical_enter(); 666 if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) { 667 error = fpusetxstate(td, xfpustate, xfpustate_size); 668 if (error != 0) { 669 critical_exit(); 670 return (error); 671 } 672 bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); 673 fpurestore(get_pcb_user_save_td(td)); 674 critical_exit(); 675 set_pcb_flags(pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); 676 } else { 677 critical_exit(); 678 error = fpusetxstate(td, xfpustate, xfpustate_size); 679 if (error != 0) 680 return (error); 681 bcopy(addr, get_pcb_user_save_td(td), sizeof(*addr)); 682 fpuuserinited(td); 683 } 684 return (0); 685} 686 687/* 688 * On AuthenticAMD processors, the fxrstor instruction does not restore 689 * the x87's stored last instruction pointer, last data pointer, and last 690 * opcode values, except in the rare case in which the exception summary 691 * (ES) bit in the x87 status word is set to 1. 692 * 693 * In order to avoid leaking this information across processes, we clean 694 * these values by performing a dummy load before executing fxrstor(). 695 */ 696static void 697fpu_clean_state(void) 698{ 699 static float dummy_variable = 0.0; 700 u_short status; 701 702 /* 703 * Clear the ES bit in the x87 status word if it is currently 704 * set, in order to avoid causing a fault in the upcoming load. 705 */ 706 fnstsw(&status); 707 if (status & 0x80) 708 fnclex(); 709 710 /* 711 * Load the dummy variable into the x87 stack. This mangles 712 * the x87 stack, but we don't care since we're about to call 713 * fxrstor() anyway. 714 */ 715 __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable)); 716} 717 718/* 719 * This really sucks. We want the acpi version only, but it requires 720 * the isa_if.h file in order to get the definitions. 721 */ 722#include "opt_isa.h" 723#ifdef DEV_ISA 724#include <isa/isavar.h> 725/* 726 * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI. 727 */ 728static struct isa_pnp_id fpupnp_ids[] = { 729 { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */ 730 { 0 } 731}; 732 733static int 734fpupnp_probe(device_t dev) 735{ 736 int result; 737 738 result = ISA_PNP_PROBE(device_get_parent(dev), dev, fpupnp_ids); 739 if (result <= 0) 740 device_quiet(dev); 741 return (result); 742} 743 744static int 745fpupnp_attach(device_t dev) 746{ 747 748 return (0); 749} 750 751static device_method_t fpupnp_methods[] = { 752 /* Device interface */ 753 DEVMETHOD(device_probe, fpupnp_probe), 754 DEVMETHOD(device_attach, fpupnp_attach), 755 DEVMETHOD(device_detach, bus_generic_detach), 756 DEVMETHOD(device_shutdown, bus_generic_shutdown), 757 DEVMETHOD(device_suspend, bus_generic_suspend), 758 DEVMETHOD(device_resume, bus_generic_resume), 759 760 { 0, 0 } 761}; 762 763static driver_t fpupnp_driver = { 764 "fpupnp", 765 fpupnp_methods, 766 1, /* no softc */ 767}; 768 769static devclass_t fpupnp_devclass; 770 771DRIVER_MODULE(fpupnp, acpi, fpupnp_driver, fpupnp_devclass, 0, 0); 772#endif /* DEV_ISA */ 773 774static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx", 775 "Kernel contexts for FPU state"); 776 777#define FPU_KERN_CTX_FPUINITDONE 0x01 778 779struct fpu_kern_ctx { 780 struct savefpu *prev; 781 uint32_t flags; 782 char hwstate1[]; 783}; 784 785struct fpu_kern_ctx * 786fpu_kern_alloc_ctx(u_int flags) 787{ 788 struct fpu_kern_ctx *res; 789 size_t sz; 790 791 sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN + 792 cpu_max_ext_state_size; 793 res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ? 794 M_NOWAIT : M_WAITOK) | M_ZERO); 795 return (res); 796} 797 798void 799fpu_kern_free_ctx(struct fpu_kern_ctx *ctx) 800{ 801 802 /* XXXKIB clear the memory ? */ 803 free(ctx, M_FPUKERN_CTX); 804} 805 806static struct savefpu * 807fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx) 808{ 809 vm_offset_t p; 810 811 p = (vm_offset_t)&ctx->hwstate1; 812 p = roundup2(p, XSAVE_AREA_ALIGN); 813 return ((struct savefpu *)p); 814} 815 816int 817fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags) 818{ 819 struct pcb *pcb; 820 821 pcb = td->td_pcb; 822 KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == 823 get_pcb_user_save_pcb(pcb), ("mangled pcb_save")); 824 ctx->flags = 0; 825 if ((pcb->pcb_flags & PCB_FPUINITDONE) != 0) 826 ctx->flags |= FPU_KERN_CTX_FPUINITDONE; 827 fpuexit(td); 828 ctx->prev = pcb->pcb_save; 829 pcb->pcb_save = fpu_kern_ctx_savefpu(ctx); 830 set_pcb_flags(pcb, PCB_KERNFPU); 831 clear_pcb_flags(pcb, PCB_FPUINITDONE); 832 return (0); 833} 834 835int 836fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx) 837{ 838 struct pcb *pcb; 839 840 pcb = td->td_pcb; 841 critical_enter(); 842 if (curthread == PCPU_GET(fpcurthread)) 843 fpudrop(); 844 critical_exit(); 845 pcb->pcb_save = ctx->prev; 846 if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) { 847 if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) { 848 set_pcb_flags(pcb, PCB_FPUINITDONE); 849 clear_pcb_flags(pcb, PCB_KERNFPU); 850 } else 851 clear_pcb_flags(pcb, PCB_FPUINITDONE | PCB_KERNFPU); 852 } else { 853 if ((ctx->flags & FPU_KERN_CTX_FPUINITDONE) != 0) 854 set_pcb_flags(pcb, PCB_FPUINITDONE); 855 else 856 clear_pcb_flags(pcb, PCB_FPUINITDONE); 857 KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave")); 858 } 859 return (0); 860} 861 862int 863fpu_kern_thread(u_int flags) 864{ 865 struct pcb *pcb; 866 867 pcb = PCPU_GET(curpcb); 868 KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0, 869 ("Only kthread may use fpu_kern_thread")); 870 KASSERT(pcb->pcb_save == get_pcb_user_save_pcb(pcb), 871 ("mangled pcb_save")); 872 KASSERT(PCB_USER_FPU(pcb), ("recursive call")); 873 874 set_pcb_flags(pcb, PCB_KERNFPU); 875 return (0); 876} 877 878int 879is_fpu_kern_thread(u_int flags) 880{ 881 882 if ((curthread->td_pflags & TDP_KTHREAD) == 0) 883 return (0); 884 return ((PCPU_GET(curpcb)->pcb_flags & PCB_KERNFPU) != 0); 885} 886