Deleted Added
full compact
npx.c (267767) npx.c (270850)
1/*-
2 * Copyright (c) 1990 William Jolitz.
3 * Copyright (c) 1991 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * from: @(#)npx.c 7.2 (Berkeley) 5/12/91
31 */
32
33#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 1990 William Jolitz.
3 * Copyright (c) 1991 The Regents of the University of California.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 4. Neither the name of the University nor the names of its contributors
15 * may be used to endorse or promote products derived from this software
16 * without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * from: @(#)npx.c 7.2 (Berkeley) 5/12/91
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/i386/isa/npx.c 267767 2014-06-23 07:37:54Z kib $");
34__FBSDID("$FreeBSD: head/sys/i386/isa/npx.c 270850 2014-08-30 17:48:38Z jhb $");
35
36#include "opt_cpu.h"
37#include "opt_isa.h"
38#include "opt_npx.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/kernel.h>
44#include <sys/lock.h>
45#include <sys/malloc.h>
46#include <sys/module.h>
47#include <sys/mutex.h>
48#include <sys/mutex.h>
49#include <sys/proc.h>
50#include <sys/smp.h>
51#include <sys/sysctl.h>
52#include <machine/bus.h>
53#include <sys/rman.h>
54#ifdef NPX_DEBUG
55#include <sys/syslog.h>
56#endif
57#include <sys/signalvar.h>
58
59#include <machine/asmacros.h>
60#include <machine/cputypes.h>
61#include <machine/frame.h>
62#include <machine/md_var.h>
63#include <machine/pcb.h>
64#include <machine/psl.h>
65#include <machine/resource.h>
66#include <machine/specialreg.h>
67#include <machine/segments.h>
68#include <machine/ucontext.h>
69
70#include <machine/intr_machdep.h>
71#ifdef XEN
72#include <xen/xen-os.h>
73#include <xen/hypervisor.h>
74#endif
75
76#ifdef DEV_ISA
77#include <isa/isavar.h>
78#endif
79
80#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
81#define CPU_ENABLE_SSE
82#endif
83
84/*
85 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
86 */
87
88#if defined(__GNUCLIKE_ASM) && !defined(lint)
89
90#define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw))
91#define fnclex() __asm __volatile("fnclex")
92#define fninit() __asm __volatile("fninit")
93#define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr)))
94#define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr)))
95#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr)))
96#define fp_divide_by_0() __asm __volatile( \
97 "fldz; fld1; fdiv %st,%st(1); fnop")
98#define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr)))
99#ifdef CPU_ENABLE_SSE
100#define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr)))
101#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
102#define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr)))
103#endif
104#else /* !(__GNUCLIKE_ASM && !lint) */
105
106void fldcw(u_short cw);
107void fnclex(void);
108void fninit(void);
109void fnsave(caddr_t addr);
110void fnstcw(caddr_t addr);
111void fnstsw(caddr_t addr);
112void fp_divide_by_0(void);
113void frstor(caddr_t addr);
114#ifdef CPU_ENABLE_SSE
115void fxsave(caddr_t addr);
116void fxrstor(caddr_t addr);
117void stmxcsr(u_int *csr);
118#endif
119
120#endif /* __GNUCLIKE_ASM && !lint */
121
122#ifdef XEN
123#define start_emulating() (HYPERVISOR_fpu_taskswitch(1))
124#define stop_emulating() (HYPERVISOR_fpu_taskswitch(0))
125#else
126#define start_emulating() load_cr0(rcr0() | CR0_TS)
127#define stop_emulating() clts()
128#endif
129
130#ifdef CPU_ENABLE_SSE
131#define GET_FPU_CW(thread) \
132 (cpu_fxsr ? \
133 (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \
134 (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw)
135#define GET_FPU_SW(thread) \
136 (cpu_fxsr ? \
137 (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \
138 (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw)
139#define SET_FPU_CW(savefpu, value) do { \
140 if (cpu_fxsr) \
141 (savefpu)->sv_xmm.sv_env.en_cw = (value); \
142 else \
143 (savefpu)->sv_87.sv_env.en_cw = (value); \
144} while (0)
145#else /* CPU_ENABLE_SSE */
146#define GET_FPU_CW(thread) \
147 (thread->td_pcb->pcb_save->sv_87.sv_env.en_cw)
148#define GET_FPU_SW(thread) \
149 (thread->td_pcb->pcb_save->sv_87.sv_env.en_sw)
150#define SET_FPU_CW(savefpu, value) \
151 (savefpu)->sv_87.sv_env.en_cw = (value)
152#endif /* CPU_ENABLE_SSE */
153
154typedef u_char bool_t;
155
156#ifdef CPU_ENABLE_SSE
157static void fpu_clean_state(void);
158#endif
159
160static void fpusave(union savefpu *);
161static void fpurstor(union savefpu *);
162static int npx_attach(device_t dev);
163static void npx_identify(driver_t *driver, device_t parent);
164static int npx_probe(device_t dev);
165
166int hw_float;
167
168SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
169 &hw_float, 0, "Floating point instructions executed in hardware");
170
171static volatile u_int npx_traps_while_probing;
172static union savefpu npx_initialstate;
173
174alias_for_inthand_t probetrap;
175__asm(" \n\
176 .text \n\
177 .p2align 2,0x90 \n\
178 .type " __XSTRING(CNAME(probetrap)) ",@function \n\
179" __XSTRING(CNAME(probetrap)) ": \n\
180 ss \n\
181 incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\
182 fnclex \n\
183 iret \n\
184");
185
186/*
187 * Identify routine. Create a connection point on our parent for probing.
188 */
189static void
190npx_identify(driver, parent)
191 driver_t *driver;
192 device_t parent;
193{
194 device_t child;
195
196 child = BUS_ADD_CHILD(parent, 0, "npx", 0);
197 if (child == NULL)
198 panic("npx_identify");
199}
200
201/*
202 * Probe routine. Set flags to tell npxattach() what to do. Set up an
203 * interrupt handler if npx needs to use interrupts.
204 */
205static int
206npx_probe(device_t dev)
207{
208 struct gate_descriptor save_idt_npxtrap;
209 u_short control, status;
210
211 device_set_desc(dev, "math processor");
212
213 /*
214 * Modern CPUs all have an FPU that uses the INT16 interface
215 * and provide a simple way to verify that, so handle the
216 * common case right away.
217 */
218 if (cpu_feature & CPUID_FPU) {
219 hw_float = 1;
220 device_quiet(dev);
221 return (0);
222 }
223
224 save_idt_npxtrap = idt[IDT_MF];
225 setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
226 GSEL(GCODE_SEL, SEL_KPL));
227
228 /*
229 * Don't trap while we're probing.
230 */
231 stop_emulating();
232
233 /*
234 * Finish resetting the coprocessor, if any. If there is an error
235 * pending, then we may get a bogus IRQ13, but npx_intr() will handle
236 * it OK. Bogus halts have never been observed, but we enabled
237 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
238 */
239 fninit();
240
241 /*
242 * Don't use fwait here because it might hang.
243 * Don't use fnop here because it usually hangs if there is no FPU.
244 */
245 DELAY(1000); /* wait for any IRQ13 */
246#ifdef DIAGNOSTIC
247 if (npx_traps_while_probing != 0)
248 printf("fninit caused %u bogus npx trap(s)\n",
249 npx_traps_while_probing);
250#endif
251 /*
252 * Check for a status of mostly zero.
253 */
254 status = 0x5a5a;
255 fnstsw(&status);
256 if ((status & 0xb8ff) == 0) {
257 /*
258 * Good, now check for a proper control word.
259 */
260 control = 0x5a5a;
261 fnstcw(&control);
262 if ((control & 0x1f3f) == 0x033f) {
263 /*
264 * We have an npx, now divide by 0 to see if exception
265 * 16 works.
266 */
267 control &= ~(1 << 2); /* enable divide by 0 trap */
268 fldcw(control);
269#ifdef FPU_ERROR_BROKEN
270 /*
271 * FPU error signal doesn't work on some CPU
272 * accelerator board.
273 */
274 hw_float = 1;
275 return (0);
276#endif
277 npx_traps_while_probing = 0;
278 fp_divide_by_0();
279 if (npx_traps_while_probing != 0) {
280 /*
281 * Good, exception 16 works.
282 */
283 hw_float = 1;
284 goto cleanup;
285 }
286 device_printf(dev,
287 "FPU does not use exception 16 for error reporting\n");
288 goto cleanup;
289 }
290 }
291
292 /*
293 * Probe failed. Floating point simply won't work.
294 * Notify user and disable FPU/MMX/SSE instruction execution.
295 */
296 device_printf(dev, "WARNING: no FPU!\n");
297 __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : :
298 "n" (CR0_EM | CR0_MP) : "ax");
299
300cleanup:
301 idt[IDT_MF] = save_idt_npxtrap;
302 return (hw_float ? 0 : ENXIO);
303}
304
305/*
306 * Attach routine - announce which it is, and wire into system
307 */
308static int
309npx_attach(device_t dev)
310{
311
312 npxinit();
313 critical_enter();
314 stop_emulating();
315 fpusave(&npx_initialstate);
316 start_emulating();
317#ifdef CPU_ENABLE_SSE
318 if (cpu_fxsr) {
319 if (npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask)
320 cpu_mxcsr_mask =
321 npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask;
322 else
323 cpu_mxcsr_mask = 0xFFBF;
324 bzero(npx_initialstate.sv_xmm.sv_fp,
325 sizeof(npx_initialstate.sv_xmm.sv_fp));
326 bzero(npx_initialstate.sv_xmm.sv_xmm,
327 sizeof(npx_initialstate.sv_xmm.sv_xmm));
328 /* XXX might need even more zeroing. */
329 } else
330#endif
331 bzero(npx_initialstate.sv_87.sv_ac,
332 sizeof(npx_initialstate.sv_87.sv_ac));
333 critical_exit();
334
335 return (0);
336}
337
338/*
339 * Initialize floating point unit.
340 */
341void
342npxinit(void)
343{
344 static union savefpu dummy;
345 register_t saveintr;
346 u_short control;
347
348 if (!hw_float)
349 return;
350 /*
351 * fninit has the same h/w bugs as fnsave. Use the detoxified
352 * fnsave to throw away any junk in the fpu. npxsave() initializes
353 * the fpu and sets fpcurthread = NULL as important side effects.
354 *
355 * It is too early for critical_enter() to work on AP.
356 */
357 saveintr = intr_disable();
358 npxsave(&dummy);
359 stop_emulating();
360#ifdef CPU_ENABLE_SSE
361 /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */
362 if (cpu_fxsr)
363 fninit();
364#endif
365 control = __INITIAL_NPXCW__;
366 fldcw(control);
367 start_emulating();
368 intr_restore(saveintr);
369}
370
371/*
372 * Free coprocessor (if we have it).
373 */
374void
375npxexit(td)
376 struct thread *td;
377{
378
379 critical_enter();
380 if (curthread == PCPU_GET(fpcurthread))
381 npxsave(curpcb->pcb_save);
382 critical_exit();
383#ifdef NPX_DEBUG
384 if (hw_float) {
385 u_int masked_exceptions;
386
387 masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
388 /*
389 * Log exceptions that would have trapped with the old
390 * control word (overflow, divide by 0, and invalid operand).
391 */
392 if (masked_exceptions & 0x0d)
393 log(LOG_ERR,
394 "pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
395 td->td_proc->p_pid, td->td_proc->p_comm,
396 masked_exceptions);
397 }
398#endif
399}
400
401int
402npxformat()
403{
404
405 if (!hw_float)
406 return (_MC_FPFMT_NODEV);
407#ifdef CPU_ENABLE_SSE
408 if (cpu_fxsr)
409 return (_MC_FPFMT_XMM);
410#endif
411 return (_MC_FPFMT_387);
412}
413
414/*
415 * The following mechanism is used to ensure that the FPE_... value
416 * that is passed as a trapcode to the signal handler of the user
417 * process does not have more than one bit set.
418 *
419 * Multiple bits may be set if the user process modifies the control
420 * word while a status word bit is already set. While this is a sign
421 * of bad coding, we have no choise than to narrow them down to one
422 * bit, since we must not send a trapcode that is not exactly one of
423 * the FPE_ macros.
424 *
425 * The mechanism has a static table with 127 entries. Each combination
426 * of the 7 FPU status word exception bits directly translates to a
427 * position in this table, where a single FPE_... value is stored.
428 * This FPE_... value stored there is considered the "most important"
429 * of the exception bits and will be sent as the signal code. The
430 * precedence of the bits is based upon Intel Document "Numerical
431 * Applications", Chapter "Special Computational Situations".
432 *
433 * The macro to choose one of these values does these steps: 1) Throw
434 * away status word bits that cannot be masked. 2) Throw away the bits
435 * currently masked in the control word, assuming the user isn't
436 * interested in them anymore. 3) Reinsert status word bit 7 (stack
437 * fault) if it is set, which cannot be masked but must be presered.
438 * 4) Use the remaining bits to point into the trapcode table.
439 *
440 * The 6 maskable bits in order of their preference, as stated in the
441 * above referenced Intel manual:
442 * 1 Invalid operation (FP_X_INV)
443 * 1a Stack underflow
444 * 1b Stack overflow
445 * 1c Operand of unsupported format
446 * 1d SNaN operand.
447 * 2 QNaN operand (not an exception, irrelavant here)
448 * 3 Any other invalid-operation not mentioned above or zero divide
449 * (FP_X_INV, FP_X_DZ)
450 * 4 Denormal operand (FP_X_DNML)
451 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL)
452 * 6 Inexact result (FP_X_IMP)
453 */
454static char fpetable[128] = {
455 0,
456 FPE_FLTINV, /* 1 - INV */
457 FPE_FLTUND, /* 2 - DNML */
458 FPE_FLTINV, /* 3 - INV | DNML */
459 FPE_FLTDIV, /* 4 - DZ */
460 FPE_FLTINV, /* 5 - INV | DZ */
461 FPE_FLTDIV, /* 6 - DNML | DZ */
462 FPE_FLTINV, /* 7 - INV | DNML | DZ */
463 FPE_FLTOVF, /* 8 - OFL */
464 FPE_FLTINV, /* 9 - INV | OFL */
465 FPE_FLTUND, /* A - DNML | OFL */
466 FPE_FLTINV, /* B - INV | DNML | OFL */
467 FPE_FLTDIV, /* C - DZ | OFL */
468 FPE_FLTINV, /* D - INV | DZ | OFL */
469 FPE_FLTDIV, /* E - DNML | DZ | OFL */
470 FPE_FLTINV, /* F - INV | DNML | DZ | OFL */
471 FPE_FLTUND, /* 10 - UFL */
472 FPE_FLTINV, /* 11 - INV | UFL */
473 FPE_FLTUND, /* 12 - DNML | UFL */
474 FPE_FLTINV, /* 13 - INV | DNML | UFL */
475 FPE_FLTDIV, /* 14 - DZ | UFL */
476 FPE_FLTINV, /* 15 - INV | DZ | UFL */
477 FPE_FLTDIV, /* 16 - DNML | DZ | UFL */
478 FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */
479 FPE_FLTOVF, /* 18 - OFL | UFL */
480 FPE_FLTINV, /* 19 - INV | OFL | UFL */
481 FPE_FLTUND, /* 1A - DNML | OFL | UFL */
482 FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */
483 FPE_FLTDIV, /* 1C - DZ | OFL | UFL */
484 FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */
485 FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */
486 FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */
487 FPE_FLTRES, /* 20 - IMP */
488 FPE_FLTINV, /* 21 - INV | IMP */
489 FPE_FLTUND, /* 22 - DNML | IMP */
490 FPE_FLTINV, /* 23 - INV | DNML | IMP */
491 FPE_FLTDIV, /* 24 - DZ | IMP */
492 FPE_FLTINV, /* 25 - INV | DZ | IMP */
493 FPE_FLTDIV, /* 26 - DNML | DZ | IMP */
494 FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */
495 FPE_FLTOVF, /* 28 - OFL | IMP */
496 FPE_FLTINV, /* 29 - INV | OFL | IMP */
497 FPE_FLTUND, /* 2A - DNML | OFL | IMP */
498 FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */
499 FPE_FLTDIV, /* 2C - DZ | OFL | IMP */
500 FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */
501 FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */
502 FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */
503 FPE_FLTUND, /* 30 - UFL | IMP */
504 FPE_FLTINV, /* 31 - INV | UFL | IMP */
505 FPE_FLTUND, /* 32 - DNML | UFL | IMP */
506 FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */
507 FPE_FLTDIV, /* 34 - DZ | UFL | IMP */
508 FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */
509 FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */
510 FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */
511 FPE_FLTOVF, /* 38 - OFL | UFL | IMP */
512 FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */
513 FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */
514 FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */
515 FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */
516 FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */
517 FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */
518 FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */
519 FPE_FLTSUB, /* 40 - STK */
520 FPE_FLTSUB, /* 41 - INV | STK */
521 FPE_FLTUND, /* 42 - DNML | STK */
522 FPE_FLTSUB, /* 43 - INV | DNML | STK */
523 FPE_FLTDIV, /* 44 - DZ | STK */
524 FPE_FLTSUB, /* 45 - INV | DZ | STK */
525 FPE_FLTDIV, /* 46 - DNML | DZ | STK */
526 FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */
527 FPE_FLTOVF, /* 48 - OFL | STK */
528 FPE_FLTSUB, /* 49 - INV | OFL | STK */
529 FPE_FLTUND, /* 4A - DNML | OFL | STK */
530 FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */
531 FPE_FLTDIV, /* 4C - DZ | OFL | STK */
532 FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */
533 FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */
534 FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */
535 FPE_FLTUND, /* 50 - UFL | STK */
536 FPE_FLTSUB, /* 51 - INV | UFL | STK */
537 FPE_FLTUND, /* 52 - DNML | UFL | STK */
538 FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */
539 FPE_FLTDIV, /* 54 - DZ | UFL | STK */
540 FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */
541 FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */
542 FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */
543 FPE_FLTOVF, /* 58 - OFL | UFL | STK */
544 FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */
545 FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */
546 FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */
547 FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */
548 FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */
549 FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */
550 FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */
551 FPE_FLTRES, /* 60 - IMP | STK */
552 FPE_FLTSUB, /* 61 - INV | IMP | STK */
553 FPE_FLTUND, /* 62 - DNML | IMP | STK */
554 FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */
555 FPE_FLTDIV, /* 64 - DZ | IMP | STK */
556 FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */
557 FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */
558 FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */
559 FPE_FLTOVF, /* 68 - OFL | IMP | STK */
560 FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */
561 FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */
562 FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */
563 FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */
564 FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */
565 FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */
566 FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */
567 FPE_FLTUND, /* 70 - UFL | IMP | STK */
568 FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */
569 FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */
570 FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */
571 FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */
572 FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */
573 FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */
574 FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */
575 FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */
576 FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */
577 FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */
578 FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */
579 FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */
580 FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */
581 FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */
582 FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
583};
584
585/*
586 * Read the FP status and control words, then generate si_code value
587 * for SIGFPE. The error code chosen will be one of the
588 * FPE_... macros. It will be sent as the second argument to old
589 * BSD-style signal handlers and as "siginfo_t->si_code" (second
590 * argument) to SA_SIGINFO signal handlers.
591 *
592 * Some time ago, we cleared the x87 exceptions with FNCLEX there.
593 * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The
594 * usermode code which understands the FPU hardware enough to enable
595 * the exceptions, can also handle clearing the exception state in the
596 * handler. The only consequence of not clearing the exception is the
597 * rethrow of the SIGFPE on return from the signal handler and
598 * reexecution of the corresponding instruction.
599 *
600 * For XMM traps, the exceptions were never cleared.
601 */
602int
603npxtrap_x87(void)
604{
605 u_short control, status;
606
607 if (!hw_float) {
608 printf(
609 "npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n",
610 PCPU_GET(fpcurthread), curthread, hw_float);
611 panic("npxtrap from nowhere");
612 }
613 critical_enter();
614
615 /*
616 * Interrupt handling (for another interrupt) may have pushed the
617 * state to memory. Fetch the relevant parts of the state from
618 * wherever they are.
619 */
620 if (PCPU_GET(fpcurthread) != curthread) {
621 control = GET_FPU_CW(curthread);
622 status = GET_FPU_SW(curthread);
623 } else {
624 fnstcw(&control);
625 fnstsw(&status);
626 }
627 critical_exit();
628 return (fpetable[status & ((~control & 0x3f) | 0x40)]);
629}
630
631#ifdef CPU_ENABLE_SSE
632int
633npxtrap_sse(void)
634{
635 u_int mxcsr;
636
637 if (!hw_float) {
638 printf(
639 "npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n",
640 PCPU_GET(fpcurthread), curthread, hw_float);
641 panic("npxtrap from nowhere");
642 }
643 critical_enter();
644 if (PCPU_GET(fpcurthread) != curthread)
645 mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr;
646 else
647 stmxcsr(&mxcsr);
648 critical_exit();
649 return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
650}
651#endif
652
653/*
654 * Implement device not available (DNA) exception
655 *
656 * It would be better to switch FP context here (if curthread != fpcurthread)
657 * and not necessarily for every context switch, but it is too hard to
658 * access foreign pcb's.
659 */
660
661static int err_count = 0;
662
663int
664npxdna(void)
665{
666
667 if (!hw_float)
668 return (0);
669 critical_enter();
670 if (PCPU_GET(fpcurthread) == curthread) {
671 printf("npxdna: fpcurthread == curthread %d times\n",
672 ++err_count);
673 stop_emulating();
674 critical_exit();
675 return (1);
676 }
677 if (PCPU_GET(fpcurthread) != NULL) {
678 printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
679 PCPU_GET(fpcurthread),
680 PCPU_GET(fpcurthread)->td_proc->p_pid,
681 curthread, curthread->td_proc->p_pid);
682 panic("npxdna");
683 }
684 stop_emulating();
685 /*
686 * Record new context early in case frstor causes an IRQ13.
687 */
688 PCPU_SET(fpcurthread, curthread);
689
690#ifdef CPU_ENABLE_SSE
691 if (cpu_fxsr)
692 fpu_clean_state();
693#endif
694
695 if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) {
696 /*
697 * This is the first time this thread has used the FPU or
698 * the PCB doesn't contain a clean FPU state. Explicitly
699 * load an initial state.
700 */
701 fpurstor(&npx_initialstate);
702 if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
703 fldcw(curpcb->pcb_initial_npxcw);
704 curpcb->pcb_flags |= PCB_NPXINITDONE;
705 if (PCB_USER_FPU(curpcb))
706 curpcb->pcb_flags |= PCB_NPXUSERINITDONE;
707 } else {
708 /*
709 * The following fpurstor() may cause an IRQ13 when the
710 * state being restored has a pending error. The error will
711 * appear to have been triggered by the current (npx) user
712 * instruction even when that instruction is a no-wait
713 * instruction that should not trigger an error (e.g.,
714 * fnclex). On at least one 486 system all of the no-wait
715 * instructions are broken the same as frstor, so our
716 * treatment does not amplify the breakage. On at least
717 * one 386/Cyrix 387 system, fnclex works correctly while
718 * frstor and fnsave are broken, so our treatment breaks
719 * fnclex if it is the first FPU instruction after a context
720 * switch.
721 */
722 fpurstor(curpcb->pcb_save);
723 }
724 critical_exit();
725
726 return (1);
727}
728
729/*
730 * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx
731 * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by
732 * no-wait npx instructions. See the Intel application note AP-578 for
733 * details. This doesn't cause any additional complications here. IRQ13's
734 * are inherently asynchronous unless the CPU is frozen to deliver them --
735 * one that started in userland may be delivered many instructions later,
736 * after the process has entered the kernel. It may even be delivered after
737 * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in
738 * the same way as a very-late-arriving non-spurious IRQ13 from user mode:
739 * it is normally ignored at first because we set fpcurthread to NULL; it is
740 * normally retriggered in npxdna() after return to user mode.
741 *
742 * npxsave() must be called with interrupts disabled, so that it clears
743 * fpcurthread atomically with saving the state. We require callers to do the
744 * disabling, since most callers need to disable interrupts anyway to call
745 * npxsave() atomically with checking fpcurthread.
746 *
747 * A previous version of npxsave() went to great lengths to excecute fnsave
748 * with interrupts enabled in case executing it froze the CPU. This case
749 * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply
750 * spurious freezes.
751 */
752void
753npxsave(addr)
754 union savefpu *addr;
755{
756
757 stop_emulating();
758 fpusave(addr);
759
760 start_emulating();
761 PCPU_SET(fpcurthread, NULL);
762}
763
35
36#include "opt_cpu.h"
37#include "opt_isa.h"
38#include "opt_npx.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/bus.h>
43#include <sys/kernel.h>
44#include <sys/lock.h>
45#include <sys/malloc.h>
46#include <sys/module.h>
47#include <sys/mutex.h>
48#include <sys/mutex.h>
49#include <sys/proc.h>
50#include <sys/smp.h>
51#include <sys/sysctl.h>
52#include <machine/bus.h>
53#include <sys/rman.h>
54#ifdef NPX_DEBUG
55#include <sys/syslog.h>
56#endif
57#include <sys/signalvar.h>
58
59#include <machine/asmacros.h>
60#include <machine/cputypes.h>
61#include <machine/frame.h>
62#include <machine/md_var.h>
63#include <machine/pcb.h>
64#include <machine/psl.h>
65#include <machine/resource.h>
66#include <machine/specialreg.h>
67#include <machine/segments.h>
68#include <machine/ucontext.h>
69
70#include <machine/intr_machdep.h>
71#ifdef XEN
72#include <xen/xen-os.h>
73#include <xen/hypervisor.h>
74#endif
75
76#ifdef DEV_ISA
77#include <isa/isavar.h>
78#endif
79
80#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
81#define CPU_ENABLE_SSE
82#endif
83
84/*
85 * 387 and 287 Numeric Coprocessor Extension (NPX) Driver.
86 */
87
88#if defined(__GNUCLIKE_ASM) && !defined(lint)
89
90#define fldcw(cw) __asm __volatile("fldcw %0" : : "m" (cw))
91#define fnclex() __asm __volatile("fnclex")
92#define fninit() __asm __volatile("fninit")
93#define fnsave(addr) __asm __volatile("fnsave %0" : "=m" (*(addr)))
94#define fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr)))
95#define fnstsw(addr) __asm __volatile("fnstsw %0" : "=am" (*(addr)))
96#define fp_divide_by_0() __asm __volatile( \
97 "fldz; fld1; fdiv %st,%st(1); fnop")
98#define frstor(addr) __asm __volatile("frstor %0" : : "m" (*(addr)))
99#ifdef CPU_ENABLE_SSE
100#define fxrstor(addr) __asm __volatile("fxrstor %0" : : "m" (*(addr)))
101#define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr)))
102#define stmxcsr(addr) __asm __volatile("stmxcsr %0" : : "m" (*(addr)))
103#endif
104#else /* !(__GNUCLIKE_ASM && !lint) */
105
106void fldcw(u_short cw);
107void fnclex(void);
108void fninit(void);
109void fnsave(caddr_t addr);
110void fnstcw(caddr_t addr);
111void fnstsw(caddr_t addr);
112void fp_divide_by_0(void);
113void frstor(caddr_t addr);
114#ifdef CPU_ENABLE_SSE
115void fxsave(caddr_t addr);
116void fxrstor(caddr_t addr);
117void stmxcsr(u_int *csr);
118#endif
119
120#endif /* __GNUCLIKE_ASM && !lint */
121
122#ifdef XEN
123#define start_emulating() (HYPERVISOR_fpu_taskswitch(1))
124#define stop_emulating() (HYPERVISOR_fpu_taskswitch(0))
125#else
126#define start_emulating() load_cr0(rcr0() | CR0_TS)
127#define stop_emulating() clts()
128#endif
129
130#ifdef CPU_ENABLE_SSE
131#define GET_FPU_CW(thread) \
132 (cpu_fxsr ? \
133 (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_cw : \
134 (thread)->td_pcb->pcb_save->sv_87.sv_env.en_cw)
135#define GET_FPU_SW(thread) \
136 (cpu_fxsr ? \
137 (thread)->td_pcb->pcb_save->sv_xmm.sv_env.en_sw : \
138 (thread)->td_pcb->pcb_save->sv_87.sv_env.en_sw)
139#define SET_FPU_CW(savefpu, value) do { \
140 if (cpu_fxsr) \
141 (savefpu)->sv_xmm.sv_env.en_cw = (value); \
142 else \
143 (savefpu)->sv_87.sv_env.en_cw = (value); \
144} while (0)
145#else /* CPU_ENABLE_SSE */
146#define GET_FPU_CW(thread) \
147 (thread->td_pcb->pcb_save->sv_87.sv_env.en_cw)
148#define GET_FPU_SW(thread) \
149 (thread->td_pcb->pcb_save->sv_87.sv_env.en_sw)
150#define SET_FPU_CW(savefpu, value) \
151 (savefpu)->sv_87.sv_env.en_cw = (value)
152#endif /* CPU_ENABLE_SSE */
153
154typedef u_char bool_t;
155
156#ifdef CPU_ENABLE_SSE
157static void fpu_clean_state(void);
158#endif
159
160static void fpusave(union savefpu *);
161static void fpurstor(union savefpu *);
162static int npx_attach(device_t dev);
163static void npx_identify(driver_t *driver, device_t parent);
164static int npx_probe(device_t dev);
165
166int hw_float;
167
168SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD,
169 &hw_float, 0, "Floating point instructions executed in hardware");
170
171static volatile u_int npx_traps_while_probing;
172static union savefpu npx_initialstate;
173
174alias_for_inthand_t probetrap;
175__asm(" \n\
176 .text \n\
177 .p2align 2,0x90 \n\
178 .type " __XSTRING(CNAME(probetrap)) ",@function \n\
179" __XSTRING(CNAME(probetrap)) ": \n\
180 ss \n\
181 incl " __XSTRING(CNAME(npx_traps_while_probing)) " \n\
182 fnclex \n\
183 iret \n\
184");
185
186/*
187 * Identify routine. Create a connection point on our parent for probing.
188 */
189static void
190npx_identify(driver, parent)
191 driver_t *driver;
192 device_t parent;
193{
194 device_t child;
195
196 child = BUS_ADD_CHILD(parent, 0, "npx", 0);
197 if (child == NULL)
198 panic("npx_identify");
199}
200
201/*
202 * Probe routine. Set flags to tell npxattach() what to do. Set up an
203 * interrupt handler if npx needs to use interrupts.
204 */
205static int
206npx_probe(device_t dev)
207{
208 struct gate_descriptor save_idt_npxtrap;
209 u_short control, status;
210
211 device_set_desc(dev, "math processor");
212
213 /*
214 * Modern CPUs all have an FPU that uses the INT16 interface
215 * and provide a simple way to verify that, so handle the
216 * common case right away.
217 */
218 if (cpu_feature & CPUID_FPU) {
219 hw_float = 1;
220 device_quiet(dev);
221 return (0);
222 }
223
224 save_idt_npxtrap = idt[IDT_MF];
225 setidt(IDT_MF, probetrap, SDT_SYS386TGT, SEL_KPL,
226 GSEL(GCODE_SEL, SEL_KPL));
227
228 /*
229 * Don't trap while we're probing.
230 */
231 stop_emulating();
232
233 /*
234 * Finish resetting the coprocessor, if any. If there is an error
235 * pending, then we may get a bogus IRQ13, but npx_intr() will handle
236 * it OK. Bogus halts have never been observed, but we enabled
237 * IRQ13 and cleared the BUSY# latch early to handle them anyway.
238 */
239 fninit();
240
241 /*
242 * Don't use fwait here because it might hang.
243 * Don't use fnop here because it usually hangs if there is no FPU.
244 */
245 DELAY(1000); /* wait for any IRQ13 */
246#ifdef DIAGNOSTIC
247 if (npx_traps_while_probing != 0)
248 printf("fninit caused %u bogus npx trap(s)\n",
249 npx_traps_while_probing);
250#endif
251 /*
252 * Check for a status of mostly zero.
253 */
254 status = 0x5a5a;
255 fnstsw(&status);
256 if ((status & 0xb8ff) == 0) {
257 /*
258 * Good, now check for a proper control word.
259 */
260 control = 0x5a5a;
261 fnstcw(&control);
262 if ((control & 0x1f3f) == 0x033f) {
263 /*
264 * We have an npx, now divide by 0 to see if exception
265 * 16 works.
266 */
267 control &= ~(1 << 2); /* enable divide by 0 trap */
268 fldcw(control);
269#ifdef FPU_ERROR_BROKEN
270 /*
271 * FPU error signal doesn't work on some CPU
272 * accelerator board.
273 */
274 hw_float = 1;
275 return (0);
276#endif
277 npx_traps_while_probing = 0;
278 fp_divide_by_0();
279 if (npx_traps_while_probing != 0) {
280 /*
281 * Good, exception 16 works.
282 */
283 hw_float = 1;
284 goto cleanup;
285 }
286 device_printf(dev,
287 "FPU does not use exception 16 for error reporting\n");
288 goto cleanup;
289 }
290 }
291
292 /*
293 * Probe failed. Floating point simply won't work.
294 * Notify user and disable FPU/MMX/SSE instruction execution.
295 */
296 device_printf(dev, "WARNING: no FPU!\n");
297 __asm __volatile("smsw %%ax; orb %0,%%al; lmsw %%ax" : :
298 "n" (CR0_EM | CR0_MP) : "ax");
299
300cleanup:
301 idt[IDT_MF] = save_idt_npxtrap;
302 return (hw_float ? 0 : ENXIO);
303}
304
305/*
306 * Attach routine - announce which it is, and wire into system
307 */
308static int
309npx_attach(device_t dev)
310{
311
312 npxinit();
313 critical_enter();
314 stop_emulating();
315 fpusave(&npx_initialstate);
316 start_emulating();
317#ifdef CPU_ENABLE_SSE
318 if (cpu_fxsr) {
319 if (npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask)
320 cpu_mxcsr_mask =
321 npx_initialstate.sv_xmm.sv_env.en_mxcsr_mask;
322 else
323 cpu_mxcsr_mask = 0xFFBF;
324 bzero(npx_initialstate.sv_xmm.sv_fp,
325 sizeof(npx_initialstate.sv_xmm.sv_fp));
326 bzero(npx_initialstate.sv_xmm.sv_xmm,
327 sizeof(npx_initialstate.sv_xmm.sv_xmm));
328 /* XXX might need even more zeroing. */
329 } else
330#endif
331 bzero(npx_initialstate.sv_87.sv_ac,
332 sizeof(npx_initialstate.sv_87.sv_ac));
333 critical_exit();
334
335 return (0);
336}
337
338/*
339 * Initialize floating point unit.
340 */
341void
342npxinit(void)
343{
344 static union savefpu dummy;
345 register_t saveintr;
346 u_short control;
347
348 if (!hw_float)
349 return;
350 /*
351 * fninit has the same h/w bugs as fnsave. Use the detoxified
352 * fnsave to throw away any junk in the fpu. npxsave() initializes
353 * the fpu and sets fpcurthread = NULL as important side effects.
354 *
355 * It is too early for critical_enter() to work on AP.
356 */
357 saveintr = intr_disable();
358 npxsave(&dummy);
359 stop_emulating();
360#ifdef CPU_ENABLE_SSE
361 /* XXX npxsave() doesn't actually initialize the fpu in the SSE case. */
362 if (cpu_fxsr)
363 fninit();
364#endif
365 control = __INITIAL_NPXCW__;
366 fldcw(control);
367 start_emulating();
368 intr_restore(saveintr);
369}
370
371/*
372 * Free coprocessor (if we have it).
373 */
374void
375npxexit(td)
376 struct thread *td;
377{
378
379 critical_enter();
380 if (curthread == PCPU_GET(fpcurthread))
381 npxsave(curpcb->pcb_save);
382 critical_exit();
383#ifdef NPX_DEBUG
384 if (hw_float) {
385 u_int masked_exceptions;
386
387 masked_exceptions = GET_FPU_CW(td) & GET_FPU_SW(td) & 0x7f;
388 /*
389 * Log exceptions that would have trapped with the old
390 * control word (overflow, divide by 0, and invalid operand).
391 */
392 if (masked_exceptions & 0x0d)
393 log(LOG_ERR,
394 "pid %d (%s) exited with masked floating point exceptions 0x%02x\n",
395 td->td_proc->p_pid, td->td_proc->p_comm,
396 masked_exceptions);
397 }
398#endif
399}
400
401int
402npxformat()
403{
404
405 if (!hw_float)
406 return (_MC_FPFMT_NODEV);
407#ifdef CPU_ENABLE_SSE
408 if (cpu_fxsr)
409 return (_MC_FPFMT_XMM);
410#endif
411 return (_MC_FPFMT_387);
412}
413
414/*
415 * The following mechanism is used to ensure that the FPE_... value
416 * that is passed as a trapcode to the signal handler of the user
417 * process does not have more than one bit set.
418 *
419 * Multiple bits may be set if the user process modifies the control
420 * word while a status word bit is already set. While this is a sign
421 * of bad coding, we have no choise than to narrow them down to one
422 * bit, since we must not send a trapcode that is not exactly one of
423 * the FPE_ macros.
424 *
425 * The mechanism has a static table with 127 entries. Each combination
426 * of the 7 FPU status word exception bits directly translates to a
427 * position in this table, where a single FPE_... value is stored.
428 * This FPE_... value stored there is considered the "most important"
429 * of the exception bits and will be sent as the signal code. The
430 * precedence of the bits is based upon Intel Document "Numerical
431 * Applications", Chapter "Special Computational Situations".
432 *
433 * The macro to choose one of these values does these steps: 1) Throw
434 * away status word bits that cannot be masked. 2) Throw away the bits
435 * currently masked in the control word, assuming the user isn't
436 * interested in them anymore. 3) Reinsert status word bit 7 (stack
437 * fault) if it is set, which cannot be masked but must be presered.
438 * 4) Use the remaining bits to point into the trapcode table.
439 *
440 * The 6 maskable bits in order of their preference, as stated in the
441 * above referenced Intel manual:
442 * 1 Invalid operation (FP_X_INV)
443 * 1a Stack underflow
444 * 1b Stack overflow
445 * 1c Operand of unsupported format
446 * 1d SNaN operand.
447 * 2 QNaN operand (not an exception, irrelavant here)
448 * 3 Any other invalid-operation not mentioned above or zero divide
449 * (FP_X_INV, FP_X_DZ)
450 * 4 Denormal operand (FP_X_DNML)
451 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL)
452 * 6 Inexact result (FP_X_IMP)
453 */
454static char fpetable[128] = {
455 0,
456 FPE_FLTINV, /* 1 - INV */
457 FPE_FLTUND, /* 2 - DNML */
458 FPE_FLTINV, /* 3 - INV | DNML */
459 FPE_FLTDIV, /* 4 - DZ */
460 FPE_FLTINV, /* 5 - INV | DZ */
461 FPE_FLTDIV, /* 6 - DNML | DZ */
462 FPE_FLTINV, /* 7 - INV | DNML | DZ */
463 FPE_FLTOVF, /* 8 - OFL */
464 FPE_FLTINV, /* 9 - INV | OFL */
465 FPE_FLTUND, /* A - DNML | OFL */
466 FPE_FLTINV, /* B - INV | DNML | OFL */
467 FPE_FLTDIV, /* C - DZ | OFL */
468 FPE_FLTINV, /* D - INV | DZ | OFL */
469 FPE_FLTDIV, /* E - DNML | DZ | OFL */
470 FPE_FLTINV, /* F - INV | DNML | DZ | OFL */
471 FPE_FLTUND, /* 10 - UFL */
472 FPE_FLTINV, /* 11 - INV | UFL */
473 FPE_FLTUND, /* 12 - DNML | UFL */
474 FPE_FLTINV, /* 13 - INV | DNML | UFL */
475 FPE_FLTDIV, /* 14 - DZ | UFL */
476 FPE_FLTINV, /* 15 - INV | DZ | UFL */
477 FPE_FLTDIV, /* 16 - DNML | DZ | UFL */
478 FPE_FLTINV, /* 17 - INV | DNML | DZ | UFL */
479 FPE_FLTOVF, /* 18 - OFL | UFL */
480 FPE_FLTINV, /* 19 - INV | OFL | UFL */
481 FPE_FLTUND, /* 1A - DNML | OFL | UFL */
482 FPE_FLTINV, /* 1B - INV | DNML | OFL | UFL */
483 FPE_FLTDIV, /* 1C - DZ | OFL | UFL */
484 FPE_FLTINV, /* 1D - INV | DZ | OFL | UFL */
485 FPE_FLTDIV, /* 1E - DNML | DZ | OFL | UFL */
486 FPE_FLTINV, /* 1F - INV | DNML | DZ | OFL | UFL */
487 FPE_FLTRES, /* 20 - IMP */
488 FPE_FLTINV, /* 21 - INV | IMP */
489 FPE_FLTUND, /* 22 - DNML | IMP */
490 FPE_FLTINV, /* 23 - INV | DNML | IMP */
491 FPE_FLTDIV, /* 24 - DZ | IMP */
492 FPE_FLTINV, /* 25 - INV | DZ | IMP */
493 FPE_FLTDIV, /* 26 - DNML | DZ | IMP */
494 FPE_FLTINV, /* 27 - INV | DNML | DZ | IMP */
495 FPE_FLTOVF, /* 28 - OFL | IMP */
496 FPE_FLTINV, /* 29 - INV | OFL | IMP */
497 FPE_FLTUND, /* 2A - DNML | OFL | IMP */
498 FPE_FLTINV, /* 2B - INV | DNML | OFL | IMP */
499 FPE_FLTDIV, /* 2C - DZ | OFL | IMP */
500 FPE_FLTINV, /* 2D - INV | DZ | OFL | IMP */
501 FPE_FLTDIV, /* 2E - DNML | DZ | OFL | IMP */
502 FPE_FLTINV, /* 2F - INV | DNML | DZ | OFL | IMP */
503 FPE_FLTUND, /* 30 - UFL | IMP */
504 FPE_FLTINV, /* 31 - INV | UFL | IMP */
505 FPE_FLTUND, /* 32 - DNML | UFL | IMP */
506 FPE_FLTINV, /* 33 - INV | DNML | UFL | IMP */
507 FPE_FLTDIV, /* 34 - DZ | UFL | IMP */
508 FPE_FLTINV, /* 35 - INV | DZ | UFL | IMP */
509 FPE_FLTDIV, /* 36 - DNML | DZ | UFL | IMP */
510 FPE_FLTINV, /* 37 - INV | DNML | DZ | UFL | IMP */
511 FPE_FLTOVF, /* 38 - OFL | UFL | IMP */
512 FPE_FLTINV, /* 39 - INV | OFL | UFL | IMP */
513 FPE_FLTUND, /* 3A - DNML | OFL | UFL | IMP */
514 FPE_FLTINV, /* 3B - INV | DNML | OFL | UFL | IMP */
515 FPE_FLTDIV, /* 3C - DZ | OFL | UFL | IMP */
516 FPE_FLTINV, /* 3D - INV | DZ | OFL | UFL | IMP */
517 FPE_FLTDIV, /* 3E - DNML | DZ | OFL | UFL | IMP */
518 FPE_FLTINV, /* 3F - INV | DNML | DZ | OFL | UFL | IMP */
519 FPE_FLTSUB, /* 40 - STK */
520 FPE_FLTSUB, /* 41 - INV | STK */
521 FPE_FLTUND, /* 42 - DNML | STK */
522 FPE_FLTSUB, /* 43 - INV | DNML | STK */
523 FPE_FLTDIV, /* 44 - DZ | STK */
524 FPE_FLTSUB, /* 45 - INV | DZ | STK */
525 FPE_FLTDIV, /* 46 - DNML | DZ | STK */
526 FPE_FLTSUB, /* 47 - INV | DNML | DZ | STK */
527 FPE_FLTOVF, /* 48 - OFL | STK */
528 FPE_FLTSUB, /* 49 - INV | OFL | STK */
529 FPE_FLTUND, /* 4A - DNML | OFL | STK */
530 FPE_FLTSUB, /* 4B - INV | DNML | OFL | STK */
531 FPE_FLTDIV, /* 4C - DZ | OFL | STK */
532 FPE_FLTSUB, /* 4D - INV | DZ | OFL | STK */
533 FPE_FLTDIV, /* 4E - DNML | DZ | OFL | STK */
534 FPE_FLTSUB, /* 4F - INV | DNML | DZ | OFL | STK */
535 FPE_FLTUND, /* 50 - UFL | STK */
536 FPE_FLTSUB, /* 51 - INV | UFL | STK */
537 FPE_FLTUND, /* 52 - DNML | UFL | STK */
538 FPE_FLTSUB, /* 53 - INV | DNML | UFL | STK */
539 FPE_FLTDIV, /* 54 - DZ | UFL | STK */
540 FPE_FLTSUB, /* 55 - INV | DZ | UFL | STK */
541 FPE_FLTDIV, /* 56 - DNML | DZ | UFL | STK */
542 FPE_FLTSUB, /* 57 - INV | DNML | DZ | UFL | STK */
543 FPE_FLTOVF, /* 58 - OFL | UFL | STK */
544 FPE_FLTSUB, /* 59 - INV | OFL | UFL | STK */
545 FPE_FLTUND, /* 5A - DNML | OFL | UFL | STK */
546 FPE_FLTSUB, /* 5B - INV | DNML | OFL | UFL | STK */
547 FPE_FLTDIV, /* 5C - DZ | OFL | UFL | STK */
548 FPE_FLTSUB, /* 5D - INV | DZ | OFL | UFL | STK */
549 FPE_FLTDIV, /* 5E - DNML | DZ | OFL | UFL | STK */
550 FPE_FLTSUB, /* 5F - INV | DNML | DZ | OFL | UFL | STK */
551 FPE_FLTRES, /* 60 - IMP | STK */
552 FPE_FLTSUB, /* 61 - INV | IMP | STK */
553 FPE_FLTUND, /* 62 - DNML | IMP | STK */
554 FPE_FLTSUB, /* 63 - INV | DNML | IMP | STK */
555 FPE_FLTDIV, /* 64 - DZ | IMP | STK */
556 FPE_FLTSUB, /* 65 - INV | DZ | IMP | STK */
557 FPE_FLTDIV, /* 66 - DNML | DZ | IMP | STK */
558 FPE_FLTSUB, /* 67 - INV | DNML | DZ | IMP | STK */
559 FPE_FLTOVF, /* 68 - OFL | IMP | STK */
560 FPE_FLTSUB, /* 69 - INV | OFL | IMP | STK */
561 FPE_FLTUND, /* 6A - DNML | OFL | IMP | STK */
562 FPE_FLTSUB, /* 6B - INV | DNML | OFL | IMP | STK */
563 FPE_FLTDIV, /* 6C - DZ | OFL | IMP | STK */
564 FPE_FLTSUB, /* 6D - INV | DZ | OFL | IMP | STK */
565 FPE_FLTDIV, /* 6E - DNML | DZ | OFL | IMP | STK */
566 FPE_FLTSUB, /* 6F - INV | DNML | DZ | OFL | IMP | STK */
567 FPE_FLTUND, /* 70 - UFL | IMP | STK */
568 FPE_FLTSUB, /* 71 - INV | UFL | IMP | STK */
569 FPE_FLTUND, /* 72 - DNML | UFL | IMP | STK */
570 FPE_FLTSUB, /* 73 - INV | DNML | UFL | IMP | STK */
571 FPE_FLTDIV, /* 74 - DZ | UFL | IMP | STK */
572 FPE_FLTSUB, /* 75 - INV | DZ | UFL | IMP | STK */
573 FPE_FLTDIV, /* 76 - DNML | DZ | UFL | IMP | STK */
574 FPE_FLTSUB, /* 77 - INV | DNML | DZ | UFL | IMP | STK */
575 FPE_FLTOVF, /* 78 - OFL | UFL | IMP | STK */
576 FPE_FLTSUB, /* 79 - INV | OFL | UFL | IMP | STK */
577 FPE_FLTUND, /* 7A - DNML | OFL | UFL | IMP | STK */
578 FPE_FLTSUB, /* 7B - INV | DNML | OFL | UFL | IMP | STK */
579 FPE_FLTDIV, /* 7C - DZ | OFL | UFL | IMP | STK */
580 FPE_FLTSUB, /* 7D - INV | DZ | OFL | UFL | IMP | STK */
581 FPE_FLTDIV, /* 7E - DNML | DZ | OFL | UFL | IMP | STK */
582 FPE_FLTSUB, /* 7F - INV | DNML | DZ | OFL | UFL | IMP | STK */
583};
584
585/*
586 * Read the FP status and control words, then generate si_code value
587 * for SIGFPE. The error code chosen will be one of the
588 * FPE_... macros. It will be sent as the second argument to old
589 * BSD-style signal handlers and as "siginfo_t->si_code" (second
590 * argument) to SA_SIGINFO signal handlers.
591 *
592 * Some time ago, we cleared the x87 exceptions with FNCLEX there.
593 * Clearing exceptions was necessary mainly to avoid IRQ13 bugs. The
594 * usermode code which understands the FPU hardware enough to enable
595 * the exceptions, can also handle clearing the exception state in the
596 * handler. The only consequence of not clearing the exception is the
597 * rethrow of the SIGFPE on return from the signal handler and
598 * reexecution of the corresponding instruction.
599 *
600 * For XMM traps, the exceptions were never cleared.
601 */
602int
603npxtrap_x87(void)
604{
605 u_short control, status;
606
607 if (!hw_float) {
608 printf(
609 "npxtrap_x87: fpcurthread = %p, curthread = %p, hw_float = %d\n",
610 PCPU_GET(fpcurthread), curthread, hw_float);
611 panic("npxtrap from nowhere");
612 }
613 critical_enter();
614
615 /*
616 * Interrupt handling (for another interrupt) may have pushed the
617 * state to memory. Fetch the relevant parts of the state from
618 * wherever they are.
619 */
620 if (PCPU_GET(fpcurthread) != curthread) {
621 control = GET_FPU_CW(curthread);
622 status = GET_FPU_SW(curthread);
623 } else {
624 fnstcw(&control);
625 fnstsw(&status);
626 }
627 critical_exit();
628 return (fpetable[status & ((~control & 0x3f) | 0x40)]);
629}
630
631#ifdef CPU_ENABLE_SSE
632int
633npxtrap_sse(void)
634{
635 u_int mxcsr;
636
637 if (!hw_float) {
638 printf(
639 "npxtrap_sse: fpcurthread = %p, curthread = %p, hw_float = %d\n",
640 PCPU_GET(fpcurthread), curthread, hw_float);
641 panic("npxtrap from nowhere");
642 }
643 critical_enter();
644 if (PCPU_GET(fpcurthread) != curthread)
645 mxcsr = curthread->td_pcb->pcb_save->sv_xmm.sv_env.en_mxcsr;
646 else
647 stmxcsr(&mxcsr);
648 critical_exit();
649 return (fpetable[(mxcsr & (~mxcsr >> 7)) & 0x3f]);
650}
651#endif
652
653/*
654 * Implement device not available (DNA) exception
655 *
656 * It would be better to switch FP context here (if curthread != fpcurthread)
657 * and not necessarily for every context switch, but it is too hard to
658 * access foreign pcb's.
659 */
660
661static int err_count = 0;
662
663int
664npxdna(void)
665{
666
667 if (!hw_float)
668 return (0);
669 critical_enter();
670 if (PCPU_GET(fpcurthread) == curthread) {
671 printf("npxdna: fpcurthread == curthread %d times\n",
672 ++err_count);
673 stop_emulating();
674 critical_exit();
675 return (1);
676 }
677 if (PCPU_GET(fpcurthread) != NULL) {
678 printf("npxdna: fpcurthread = %p (%d), curthread = %p (%d)\n",
679 PCPU_GET(fpcurthread),
680 PCPU_GET(fpcurthread)->td_proc->p_pid,
681 curthread, curthread->td_proc->p_pid);
682 panic("npxdna");
683 }
684 stop_emulating();
685 /*
686 * Record new context early in case frstor causes an IRQ13.
687 */
688 PCPU_SET(fpcurthread, curthread);
689
690#ifdef CPU_ENABLE_SSE
691 if (cpu_fxsr)
692 fpu_clean_state();
693#endif
694
695 if ((curpcb->pcb_flags & PCB_NPXINITDONE) == 0) {
696 /*
697 * This is the first time this thread has used the FPU or
698 * the PCB doesn't contain a clean FPU state. Explicitly
699 * load an initial state.
700 */
701 fpurstor(&npx_initialstate);
702 if (curpcb->pcb_initial_npxcw != __INITIAL_NPXCW__)
703 fldcw(curpcb->pcb_initial_npxcw);
704 curpcb->pcb_flags |= PCB_NPXINITDONE;
705 if (PCB_USER_FPU(curpcb))
706 curpcb->pcb_flags |= PCB_NPXUSERINITDONE;
707 } else {
708 /*
709 * The following fpurstor() may cause an IRQ13 when the
710 * state being restored has a pending error. The error will
711 * appear to have been triggered by the current (npx) user
712 * instruction even when that instruction is a no-wait
713 * instruction that should not trigger an error (e.g.,
714 * fnclex). On at least one 486 system all of the no-wait
715 * instructions are broken the same as frstor, so our
716 * treatment does not amplify the breakage. On at least
717 * one 386/Cyrix 387 system, fnclex works correctly while
718 * frstor and fnsave are broken, so our treatment breaks
719 * fnclex if it is the first FPU instruction after a context
720 * switch.
721 */
722 fpurstor(curpcb->pcb_save);
723 }
724 critical_exit();
725
726 return (1);
727}
728
729/*
730 * Wrapper for fnsave instruction, partly to handle hardware bugs. When npx
731 * exceptions are reported via IRQ13, spurious IRQ13's may be triggered by
732 * no-wait npx instructions. See the Intel application note AP-578 for
733 * details. This doesn't cause any additional complications here. IRQ13's
734 * are inherently asynchronous unless the CPU is frozen to deliver them --
735 * one that started in userland may be delivered many instructions later,
736 * after the process has entered the kernel. It may even be delivered after
737 * the fnsave here completes. A spurious IRQ13 for the fnsave is handled in
738 * the same way as a very-late-arriving non-spurious IRQ13 from user mode:
739 * it is normally ignored at first because we set fpcurthread to NULL; it is
740 * normally retriggered in npxdna() after return to user mode.
741 *
742 * npxsave() must be called with interrupts disabled, so that it clears
743 * fpcurthread atomically with saving the state. We require callers to do the
744 * disabling, since most callers need to disable interrupts anyway to call
745 * npxsave() atomically with checking fpcurthread.
746 *
747 * A previous version of npxsave() went to great lengths to excecute fnsave
748 * with interrupts enabled in case executing it froze the CPU. This case
749 * can't happen, at least for Intel CPU/NPX's. Spurious IRQ13's don't imply
750 * spurious freezes.
751 */
752void
753npxsave(addr)
754 union savefpu *addr;
755{
756
757 stop_emulating();
758 fpusave(addr);
759
760 start_emulating();
761 PCPU_SET(fpcurthread, NULL);
762}
763
764/*
765 * Unconditionally save the current co-processor state across suspend and
766 * resume.
767 */
764void
768void
769npxsuspend(union savefpu *addr)
770{
771 register_t cr0;
772
773 if (!hw_float)
774 return;
775 if (PCPU_GET(fpcurthread) == NULL) {
776 *addr = npx_initialstate;
777 return;
778 }
779 cr0 = rcr0();
780 clts();
781 fpusave(addr);
782 load_cr0(cr0);
783}
784
785void
786npxresume(union savefpu *addr)
787{
788 register_t cr0;
789
790 if (!hw_float)
791 return;
792
793 cr0 = rcr0();
794 clts();
795 npxinit();
796 stop_emulating();
797 fpurstor(addr);
798 load_cr0(cr0);
799}
800
801void
765npxdrop()
766{
767 struct thread *td;
768
769 /*
770 * Discard pending exceptions in the !cpu_fxsr case so that unmasked
771 * ones don't cause a panic on the next frstor.
772 */
773#ifdef CPU_ENABLE_SSE
774 if (!cpu_fxsr)
775#endif
776 fnclex();
777
778 td = PCPU_GET(fpcurthread);
779 KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
780 CRITICAL_ASSERT(td);
781 PCPU_SET(fpcurthread, NULL);
782 td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
783 start_emulating();
784}
785
786/*
787 * Get the user state of the FPU into pcb->pcb_user_save without
788 * dropping ownership (if possible). It returns the FPU ownership
789 * status.
790 */
791int
792npxgetregs(struct thread *td)
793{
794 struct pcb *pcb;
795
796 if (!hw_float)
797 return (_MC_FPOWNED_NONE);
798
799 pcb = td->td_pcb;
800 if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
801 bcopy(&npx_initialstate, &pcb->pcb_user_save,
802 sizeof(npx_initialstate));
803 SET_FPU_CW(&pcb->pcb_user_save, pcb->pcb_initial_npxcw);
804 npxuserinited(td);
805 return (_MC_FPOWNED_PCB);
806 }
807 critical_enter();
808 if (td == PCPU_GET(fpcurthread)) {
809 fpusave(&pcb->pcb_user_save);
810#ifdef CPU_ENABLE_SSE
811 if (!cpu_fxsr)
812#endif
813 /*
814 * fnsave initializes the FPU and destroys whatever
815 * context it contains. Make sure the FPU owner
816 * starts with a clean state next time.
817 */
818 npxdrop();
819 critical_exit();
820 return (_MC_FPOWNED_FPU);
821 } else {
822 critical_exit();
823 return (_MC_FPOWNED_PCB);
824 }
825}
826
827void
828npxuserinited(struct thread *td)
829{
830 struct pcb *pcb;
831
832 pcb = td->td_pcb;
833 if (PCB_USER_FPU(pcb))
834 pcb->pcb_flags |= PCB_NPXINITDONE;
835 pcb->pcb_flags |= PCB_NPXUSERINITDONE;
836}
837
838
839void
840npxsetregs(struct thread *td, union savefpu *addr)
841{
842 struct pcb *pcb;
843
844 if (!hw_float)
845 return;
846
847 pcb = td->td_pcb;
848 critical_enter();
849 if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
850#ifdef CPU_ENABLE_SSE
851 if (!cpu_fxsr)
852#endif
853 fnclex(); /* As in npxdrop(). */
854 if (((uintptr_t)addr & 0xf) != 0) {
855 bcopy(addr, &pcb->pcb_user_save, sizeof(*addr));
856 fpurstor(&pcb->pcb_user_save);
857 } else
858 fpurstor(addr);
859 critical_exit();
860 pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE;
861 } else {
862 critical_exit();
863 bcopy(addr, &pcb->pcb_user_save, sizeof(*addr));
864 npxuserinited(td);
865 }
866}
867
868static void
869fpusave(addr)
870 union savefpu *addr;
871{
872
873#ifdef CPU_ENABLE_SSE
874 if (cpu_fxsr)
875 fxsave(addr);
876 else
877#endif
878 fnsave(addr);
879}
880
881#ifdef CPU_ENABLE_SSE
882/*
883 * On AuthenticAMD processors, the fxrstor instruction does not restore
884 * the x87's stored last instruction pointer, last data pointer, and last
885 * opcode values, except in the rare case in which the exception summary
886 * (ES) bit in the x87 status word is set to 1.
887 *
888 * In order to avoid leaking this information across processes, we clean
889 * these values by performing a dummy load before executing fxrstor().
890 */
891static void
892fpu_clean_state(void)
893{
894 static float dummy_variable = 0.0;
895 u_short status;
896
897 /*
898 * Clear the ES bit in the x87 status word if it is currently
899 * set, in order to avoid causing a fault in the upcoming load.
900 */
901 fnstsw(&status);
902 if (status & 0x80)
903 fnclex();
904
905 /*
906 * Load the dummy variable into the x87 stack. This mangles
907 * the x87 stack, but we don't care since we're about to call
908 * fxrstor() anyway.
909 */
910 __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable));
911}
912#endif /* CPU_ENABLE_SSE */
913
914static void
915fpurstor(addr)
916 union savefpu *addr;
917{
918
919#ifdef CPU_ENABLE_SSE
920 if (cpu_fxsr)
921 fxrstor(addr);
922 else
923#endif
924 frstor(addr);
925}
926
927static device_method_t npx_methods[] = {
928 /* Device interface */
929 DEVMETHOD(device_identify, npx_identify),
930 DEVMETHOD(device_probe, npx_probe),
931 DEVMETHOD(device_attach, npx_attach),
932 DEVMETHOD(device_detach, bus_generic_detach),
933 DEVMETHOD(device_shutdown, bus_generic_shutdown),
934 DEVMETHOD(device_suspend, bus_generic_suspend),
935 DEVMETHOD(device_resume, bus_generic_resume),
936
937 { 0, 0 }
938};
939
940static driver_t npx_driver = {
941 "npx",
942 npx_methods,
943 1, /* no softc */
944};
945
946static devclass_t npx_devclass;
947
948/*
949 * We prefer to attach to the root nexus so that the usual case (exception 16)
950 * doesn't describe the processor as being `on isa'.
951 */
952DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0);
953
954#ifdef DEV_ISA
955/*
956 * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
957 */
958static struct isa_pnp_id npxisa_ids[] = {
959 { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
960 { 0 }
961};
962
963static int
964npxisa_probe(device_t dev)
965{
966 int result;
967 if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) {
968 device_quiet(dev);
969 }
970 return(result);
971}
972
973static int
974npxisa_attach(device_t dev)
975{
976 return (0);
977}
978
979static device_method_t npxisa_methods[] = {
980 /* Device interface */
981 DEVMETHOD(device_probe, npxisa_probe),
982 DEVMETHOD(device_attach, npxisa_attach),
983 DEVMETHOD(device_detach, bus_generic_detach),
984 DEVMETHOD(device_shutdown, bus_generic_shutdown),
985 DEVMETHOD(device_suspend, bus_generic_suspend),
986 DEVMETHOD(device_resume, bus_generic_resume),
987
988 { 0, 0 }
989};
990
991static driver_t npxisa_driver = {
992 "npxisa",
993 npxisa_methods,
994 1, /* no softc */
995};
996
997static devclass_t npxisa_devclass;
998
999DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
1000#ifndef PC98
1001DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
1002#endif
1003#endif /* DEV_ISA */
1004
1005static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
1006 "Kernel contexts for FPU state");
1007
1008#define XSAVE_AREA_ALIGN 64
1009
1010#define FPU_KERN_CTX_NPXINITDONE 0x01
1011#define FPU_KERN_CTX_DUMMY 0x02
1012
1013struct fpu_kern_ctx {
1014 union savefpu *prev;
1015 uint32_t flags;
1016 char hwstate1[];
1017};
1018
1019struct fpu_kern_ctx *
1020fpu_kern_alloc_ctx(u_int flags)
1021{
1022 struct fpu_kern_ctx *res;
1023 size_t sz;
1024
1025 sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN +
1026 sizeof(union savefpu);
1027 res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
1028 M_NOWAIT : M_WAITOK) | M_ZERO);
1029 return (res);
1030}
1031
1032void
1033fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
1034{
1035
1036 /* XXXKIB clear the memory ? */
1037 free(ctx, M_FPUKERN_CTX);
1038}
1039
1040static union savefpu *
1041fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
1042{
1043 vm_offset_t p;
1044
1045 p = (vm_offset_t)&ctx->hwstate1;
1046 p = roundup2(p, XSAVE_AREA_ALIGN);
1047 return ((union savefpu *)p);
1048}
1049
1050int
1051fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
1052{
1053 struct pcb *pcb;
1054
1055 if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
1056 ctx->flags = FPU_KERN_CTX_DUMMY;
1057 return (0);
1058 }
1059 pcb = td->td_pcb;
1060 KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save,
1061 ("mangled pcb_save"));
1062 ctx->flags = 0;
1063 if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0)
1064 ctx->flags |= FPU_KERN_CTX_NPXINITDONE;
1065 npxexit(td);
1066 ctx->prev = pcb->pcb_save;
1067 pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
1068 pcb->pcb_flags |= PCB_KERNNPX;
1069 pcb->pcb_flags &= ~PCB_NPXINITDONE;
1070 return (0);
1071}
1072
1073int
1074fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
1075{
1076 struct pcb *pcb;
1077
1078 if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
1079 return (0);
1080 pcb = td->td_pcb;
1081 critical_enter();
1082 if (curthread == PCPU_GET(fpcurthread))
1083 npxdrop();
1084 critical_exit();
1085 pcb->pcb_save = ctx->prev;
1086 if (pcb->pcb_save == &pcb->pcb_user_save) {
1087 if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0)
1088 pcb->pcb_flags |= PCB_NPXINITDONE;
1089 else
1090 pcb->pcb_flags &= ~PCB_NPXINITDONE;
1091 pcb->pcb_flags &= ~PCB_KERNNPX;
1092 } else {
1093 if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0)
1094 pcb->pcb_flags |= PCB_NPXINITDONE;
1095 else
1096 pcb->pcb_flags &= ~PCB_NPXINITDONE;
1097 KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
1098 }
1099 return (0);
1100}
1101
1102int
1103fpu_kern_thread(u_int flags)
1104{
1105 struct pcb *pcb;
1106
1107 pcb = curpcb;
1108 KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
1109 ("Only kthread may use fpu_kern_thread"));
1110 KASSERT(curpcb->pcb_save == &curpcb->pcb_user_save,
1111 ("mangled pcb_save"));
1112 KASSERT(PCB_USER_FPU(curpcb), ("recursive call"));
1113
1114 curpcb->pcb_flags |= PCB_KERNNPX;
1115 return (0);
1116}
1117
1118int
1119is_fpu_kern_thread(u_int flags)
1120{
1121
1122 if ((curthread->td_pflags & TDP_KTHREAD) == 0)
1123 return (0);
1124 return ((curpcb->pcb_flags & PCB_KERNNPX) != 0);
1125}
802npxdrop()
803{
804 struct thread *td;
805
806 /*
807 * Discard pending exceptions in the !cpu_fxsr case so that unmasked
808 * ones don't cause a panic on the next frstor.
809 */
810#ifdef CPU_ENABLE_SSE
811 if (!cpu_fxsr)
812#endif
813 fnclex();
814
815 td = PCPU_GET(fpcurthread);
816 KASSERT(td == curthread, ("fpudrop: fpcurthread != curthread"));
817 CRITICAL_ASSERT(td);
818 PCPU_SET(fpcurthread, NULL);
819 td->td_pcb->pcb_flags &= ~PCB_NPXINITDONE;
820 start_emulating();
821}
822
823/*
824 * Get the user state of the FPU into pcb->pcb_user_save without
825 * dropping ownership (if possible). It returns the FPU ownership
826 * status.
827 */
828int
829npxgetregs(struct thread *td)
830{
831 struct pcb *pcb;
832
833 if (!hw_float)
834 return (_MC_FPOWNED_NONE);
835
836 pcb = td->td_pcb;
837 if ((pcb->pcb_flags & PCB_NPXINITDONE) == 0) {
838 bcopy(&npx_initialstate, &pcb->pcb_user_save,
839 sizeof(npx_initialstate));
840 SET_FPU_CW(&pcb->pcb_user_save, pcb->pcb_initial_npxcw);
841 npxuserinited(td);
842 return (_MC_FPOWNED_PCB);
843 }
844 critical_enter();
845 if (td == PCPU_GET(fpcurthread)) {
846 fpusave(&pcb->pcb_user_save);
847#ifdef CPU_ENABLE_SSE
848 if (!cpu_fxsr)
849#endif
850 /*
851 * fnsave initializes the FPU and destroys whatever
852 * context it contains. Make sure the FPU owner
853 * starts with a clean state next time.
854 */
855 npxdrop();
856 critical_exit();
857 return (_MC_FPOWNED_FPU);
858 } else {
859 critical_exit();
860 return (_MC_FPOWNED_PCB);
861 }
862}
863
864void
865npxuserinited(struct thread *td)
866{
867 struct pcb *pcb;
868
869 pcb = td->td_pcb;
870 if (PCB_USER_FPU(pcb))
871 pcb->pcb_flags |= PCB_NPXINITDONE;
872 pcb->pcb_flags |= PCB_NPXUSERINITDONE;
873}
874
875
876void
877npxsetregs(struct thread *td, union savefpu *addr)
878{
879 struct pcb *pcb;
880
881 if (!hw_float)
882 return;
883
884 pcb = td->td_pcb;
885 critical_enter();
886 if (td == PCPU_GET(fpcurthread) && PCB_USER_FPU(pcb)) {
887#ifdef CPU_ENABLE_SSE
888 if (!cpu_fxsr)
889#endif
890 fnclex(); /* As in npxdrop(). */
891 if (((uintptr_t)addr & 0xf) != 0) {
892 bcopy(addr, &pcb->pcb_user_save, sizeof(*addr));
893 fpurstor(&pcb->pcb_user_save);
894 } else
895 fpurstor(addr);
896 critical_exit();
897 pcb->pcb_flags |= PCB_NPXUSERINITDONE | PCB_NPXINITDONE;
898 } else {
899 critical_exit();
900 bcopy(addr, &pcb->pcb_user_save, sizeof(*addr));
901 npxuserinited(td);
902 }
903}
904
905static void
906fpusave(addr)
907 union savefpu *addr;
908{
909
910#ifdef CPU_ENABLE_SSE
911 if (cpu_fxsr)
912 fxsave(addr);
913 else
914#endif
915 fnsave(addr);
916}
917
918#ifdef CPU_ENABLE_SSE
919/*
920 * On AuthenticAMD processors, the fxrstor instruction does not restore
921 * the x87's stored last instruction pointer, last data pointer, and last
922 * opcode values, except in the rare case in which the exception summary
923 * (ES) bit in the x87 status word is set to 1.
924 *
925 * In order to avoid leaking this information across processes, we clean
926 * these values by performing a dummy load before executing fxrstor().
927 */
928static void
929fpu_clean_state(void)
930{
931 static float dummy_variable = 0.0;
932 u_short status;
933
934 /*
935 * Clear the ES bit in the x87 status word if it is currently
936 * set, in order to avoid causing a fault in the upcoming load.
937 */
938 fnstsw(&status);
939 if (status & 0x80)
940 fnclex();
941
942 /*
943 * Load the dummy variable into the x87 stack. This mangles
944 * the x87 stack, but we don't care since we're about to call
945 * fxrstor() anyway.
946 */
947 __asm __volatile("ffree %%st(7); flds %0" : : "m" (dummy_variable));
948}
949#endif /* CPU_ENABLE_SSE */
950
951static void
952fpurstor(addr)
953 union savefpu *addr;
954{
955
956#ifdef CPU_ENABLE_SSE
957 if (cpu_fxsr)
958 fxrstor(addr);
959 else
960#endif
961 frstor(addr);
962}
963
964static device_method_t npx_methods[] = {
965 /* Device interface */
966 DEVMETHOD(device_identify, npx_identify),
967 DEVMETHOD(device_probe, npx_probe),
968 DEVMETHOD(device_attach, npx_attach),
969 DEVMETHOD(device_detach, bus_generic_detach),
970 DEVMETHOD(device_shutdown, bus_generic_shutdown),
971 DEVMETHOD(device_suspend, bus_generic_suspend),
972 DEVMETHOD(device_resume, bus_generic_resume),
973
974 { 0, 0 }
975};
976
977static driver_t npx_driver = {
978 "npx",
979 npx_methods,
980 1, /* no softc */
981};
982
983static devclass_t npx_devclass;
984
985/*
986 * We prefer to attach to the root nexus so that the usual case (exception 16)
987 * doesn't describe the processor as being `on isa'.
988 */
989DRIVER_MODULE(npx, nexus, npx_driver, npx_devclass, 0, 0);
990
991#ifdef DEV_ISA
992/*
993 * This sucks up the legacy ISA support assignments from PNPBIOS/ACPI.
994 */
995static struct isa_pnp_id npxisa_ids[] = {
996 { 0x040cd041, "Legacy ISA coprocessor support" }, /* PNP0C04 */
997 { 0 }
998};
999
1000static int
1001npxisa_probe(device_t dev)
1002{
1003 int result;
1004 if ((result = ISA_PNP_PROBE(device_get_parent(dev), dev, npxisa_ids)) <= 0) {
1005 device_quiet(dev);
1006 }
1007 return(result);
1008}
1009
1010static int
1011npxisa_attach(device_t dev)
1012{
1013 return (0);
1014}
1015
1016static device_method_t npxisa_methods[] = {
1017 /* Device interface */
1018 DEVMETHOD(device_probe, npxisa_probe),
1019 DEVMETHOD(device_attach, npxisa_attach),
1020 DEVMETHOD(device_detach, bus_generic_detach),
1021 DEVMETHOD(device_shutdown, bus_generic_shutdown),
1022 DEVMETHOD(device_suspend, bus_generic_suspend),
1023 DEVMETHOD(device_resume, bus_generic_resume),
1024
1025 { 0, 0 }
1026};
1027
1028static driver_t npxisa_driver = {
1029 "npxisa",
1030 npxisa_methods,
1031 1, /* no softc */
1032};
1033
1034static devclass_t npxisa_devclass;
1035
1036DRIVER_MODULE(npxisa, isa, npxisa_driver, npxisa_devclass, 0, 0);
1037#ifndef PC98
1038DRIVER_MODULE(npxisa, acpi, npxisa_driver, npxisa_devclass, 0, 0);
1039#endif
1040#endif /* DEV_ISA */
1041
1042static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
1043 "Kernel contexts for FPU state");
1044
1045#define XSAVE_AREA_ALIGN 64
1046
1047#define FPU_KERN_CTX_NPXINITDONE 0x01
1048#define FPU_KERN_CTX_DUMMY 0x02
1049
1050struct fpu_kern_ctx {
1051 union savefpu *prev;
1052 uint32_t flags;
1053 char hwstate1[];
1054};
1055
1056struct fpu_kern_ctx *
1057fpu_kern_alloc_ctx(u_int flags)
1058{
1059 struct fpu_kern_ctx *res;
1060 size_t sz;
1061
1062 sz = sizeof(struct fpu_kern_ctx) + XSAVE_AREA_ALIGN +
1063 sizeof(union savefpu);
1064 res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
1065 M_NOWAIT : M_WAITOK) | M_ZERO);
1066 return (res);
1067}
1068
1069void
1070fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
1071{
1072
1073 /* XXXKIB clear the memory ? */
1074 free(ctx, M_FPUKERN_CTX);
1075}
1076
1077static union savefpu *
1078fpu_kern_ctx_savefpu(struct fpu_kern_ctx *ctx)
1079{
1080 vm_offset_t p;
1081
1082 p = (vm_offset_t)&ctx->hwstate1;
1083 p = roundup2(p, XSAVE_AREA_ALIGN);
1084 return ((union savefpu *)p);
1085}
1086
1087int
1088fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
1089{
1090 struct pcb *pcb;
1091
1092 if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
1093 ctx->flags = FPU_KERN_CTX_DUMMY;
1094 return (0);
1095 }
1096 pcb = td->td_pcb;
1097 KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save == &pcb->pcb_user_save,
1098 ("mangled pcb_save"));
1099 ctx->flags = 0;
1100 if ((pcb->pcb_flags & PCB_NPXINITDONE) != 0)
1101 ctx->flags |= FPU_KERN_CTX_NPXINITDONE;
1102 npxexit(td);
1103 ctx->prev = pcb->pcb_save;
1104 pcb->pcb_save = fpu_kern_ctx_savefpu(ctx);
1105 pcb->pcb_flags |= PCB_KERNNPX;
1106 pcb->pcb_flags &= ~PCB_NPXINITDONE;
1107 return (0);
1108}
1109
1110int
1111fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
1112{
1113 struct pcb *pcb;
1114
1115 if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
1116 return (0);
1117 pcb = td->td_pcb;
1118 critical_enter();
1119 if (curthread == PCPU_GET(fpcurthread))
1120 npxdrop();
1121 critical_exit();
1122 pcb->pcb_save = ctx->prev;
1123 if (pcb->pcb_save == &pcb->pcb_user_save) {
1124 if ((pcb->pcb_flags & PCB_NPXUSERINITDONE) != 0)
1125 pcb->pcb_flags |= PCB_NPXINITDONE;
1126 else
1127 pcb->pcb_flags &= ~PCB_NPXINITDONE;
1128 pcb->pcb_flags &= ~PCB_KERNNPX;
1129 } else {
1130 if ((ctx->flags & FPU_KERN_CTX_NPXINITDONE) != 0)
1131 pcb->pcb_flags |= PCB_NPXINITDONE;
1132 else
1133 pcb->pcb_flags &= ~PCB_NPXINITDONE;
1134 KASSERT(!PCB_USER_FPU(pcb), ("unpaired fpu_kern_leave"));
1135 }
1136 return (0);
1137}
1138
1139int
1140fpu_kern_thread(u_int flags)
1141{
1142 struct pcb *pcb;
1143
1144 pcb = curpcb;
1145 KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
1146 ("Only kthread may use fpu_kern_thread"));
1147 KASSERT(curpcb->pcb_save == &curpcb->pcb_user_save,
1148 ("mangled pcb_save"));
1149 KASSERT(PCB_USER_FPU(curpcb), ("recursive call"));
1150
1151 curpcb->pcb_flags |= PCB_KERNNPX;
1152 return (0);
1153}
1154
1155int
1156is_fpu_kern_thread(u_int flags)
1157{
1158
1159 if ((curthread->td_pflags & TDP_KTHREAD) == 0)
1160 return (0);
1161 return ((curpcb->pcb_flags & PCB_KERNNPX) != 0);
1162}