1/*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1992-1990 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59
60#include <mach/exception_types.h>
61#include <mach/i386/thread_status.h>
62#include <mach/i386/fp_reg.h>
63#include <mach/branch_predicates.h>
64
65#include <kern/mach_param.h>
66#include <kern/processor.h>
67#include <kern/thread.h>
68#include <kern/zalloc.h>
69#include <kern/misc_protos.h>
70#include <kern/spl.h>
71#include <kern/assert.h>
72
73#include <libkern/OSAtomic.h>
74
75#include <architecture/i386/pio.h>
76#include <i386/cpuid.h>
77#include <i386/fpu.h>
78#include <i386/proc_reg.h>
79#include <i386/misc_protos.h>
80#include <i386/thread.h>
81#include <i386/trap.h>
82
83int		fp_kind = FP_NO;	/* not inited */
84zone_t		ifps_zone;		/* zone for FPU save area */
85
86#define ALIGNED(addr,size)	(((uintptr_t)(addr)&((size)-1))==0)
87
88/* Forward */
89
90extern void		fpinit(void);
91extern void		fp_save(
92				thread_t	thr_act);
93extern void		fp_load(
94				thread_t	thr_act);
95
96static void configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps);
97
98struct x86_avx_thread_state initial_fp_state __attribute((aligned(64)));
99
100
101/* Global MXCSR capability bitmask */
102static unsigned int mxcsr_capability_mask;
103
104#define	fninit() \
105	__asm__ volatile("fninit")
106
107#define	fnstcw(control) \
108	__asm__("fnstcw %0" : "=m" (*(unsigned short *)(control)))
109
110#define	fldcw(control) \
111	__asm__ volatile("fldcw %0" : : "m" (*(unsigned short *) &(control)) )
112
113#define	fnclex() \
114	__asm__ volatile("fnclex")
115
116#define	fnsave(state)  \
117	__asm__ volatile("fnsave %0" : "=m" (*state))
118
119#define	frstor(state) \
120	__asm__ volatile("frstor %0" : : "m" (state))
121
122#define fwait() \
123    	__asm__("fwait");
124
125#define fxrstor(addr)           __asm__ __volatile__("fxrstor %0" : : "m" (*(addr)))
126#define fxsave(addr)            __asm__ __volatile__("fxsave %0" : "=m" (*(addr)))
127
128static uint32_t	fp_register_state_size = 0;
129static uint32_t fpu_YMM_present	= FALSE;
130static uint32_t	cpuid_reevaluated = 0;
131
132static void fpu_store_registers(void *, boolean_t);
133static void fpu_load_registers(void *);
134
135extern	void xsave64o(void);
136extern	void xrstor64o(void);
137
138#define XMASK ((uint32_t) (XFEM_X87 | XFEM_SSE | XFEM_YMM))
139
140static inline void xsetbv(uint32_t mask_hi, uint32_t mask_lo) {
141	__asm__ __volatile__("xsetbv" :: "a"(mask_lo), "d"(mask_hi), "c" (XCR0));
142}
143
144static inline void xsave(struct x86_fx_thread_state *a) {
145	__asm__ __volatile__("xsave %0" :"=m" (*a) : "a"(XMASK), "d"(0));
146}
147
148static inline void xrstor(struct x86_fx_thread_state *a) {
149	__asm__ __volatile__("xrstor %0" ::  "m" (*a), "a"(XMASK), "d"(0));
150}
151
152#if	DEBUG
153static inline unsigned short
154fnstsw(void)
155{
156	unsigned short status;
157	__asm__ volatile("fnstsw %0" : "=ma" (status));
158	return(status);
159}
160#endif
161
162/*
163 * Configure the initial FPU state presented to new threads.
164 * Determine the MXCSR capability mask, which allows us to mask off any
165 * potentially unsafe "reserved" bits before restoring the FPU context.
166 * *Not* per-cpu, assumes symmetry.
167 */
168
169static void
170configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps)
171{
172	/* XSAVE requires a 64 byte aligned store */
173	assert(ALIGNED(fps, 64));
174	/* Clear, to prepare for the diagnostic FXSAVE */
175	bzero(fps, sizeof(*fps));
176
177	fpinit();
178	fpu_store_registers(fps, FALSE);
179
180	mxcsr_capability_mask = fps->fx_MXCSR_MASK;
181
182	/* Set default mask value if necessary */
183	if (mxcsr_capability_mask == 0)
184		mxcsr_capability_mask = 0xffbf;
185
186	/* Clear vector register store */
187	bzero(&fps->fx_XMM_reg[0][0], sizeof(fps->fx_XMM_reg));
188	bzero(&fps->x_YMMH_reg[0][0], sizeof(fps->x_YMMH_reg));
189
190	fps->fp_valid = TRUE;
191	fps->fp_save_layout = fpu_YMM_present ? XSAVE32: FXSAVE32;
192	fpu_load_registers(fps);
193
194	/* Poison values to trap unsafe usage */
195	fps->fp_valid = 0xFFFFFFFF;
196	fps->fp_save_layout = FP_UNUSED;
197
198	/* Re-enable FPU/SSE DNA exceptions */
199	set_ts();
200}
201
202
203/*
204 * Look for FPU and initialize it.
205 * Called on each CPU.
206 */
207void
208init_fpu(void)
209{
210#if	DEBUG
211	unsigned short	status;
212	unsigned short 	control;
213#endif
214	/*
215	 * Check for FPU by initializing it,
216	 * then trying to read the correct bit patterns from
217	 * the control and status registers.
218	 */
219	set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE);	/* allow use of FPU */
220	fninit();
221#if	DEBUG
222	status = fnstsw();
223	fnstcw(&control);
224
225	assert(((status & 0xff) == 0) && ((control & 0x103f) == 0x3f));
226#endif
227	/* Advertise SSE support */
228	if (cpuid_features() & CPUID_FEATURE_FXSR) {
229		fp_kind = FP_FXSR;
230		set_cr4(get_cr4() | CR4_OSFXS);
231		/* And allow SIMD exceptions if present */
232		if (cpuid_features() & CPUID_FEATURE_SSE) {
233			set_cr4(get_cr4() | CR4_OSXMM);
234		}
235		fp_register_state_size = sizeof(struct x86_fx_thread_state);
236
237	} else
238		panic("fpu is not FP_FXSR");
239
240	/* Configure the XSAVE context mechanism if the processor supports
241	 * AVX/YMM registers
242	 */
243	if (cpuid_features() & CPUID_FEATURE_XSAVE) {
244		cpuid_xsave_leaf_t *xsp = &cpuid_info()->cpuid_xsave_leaf;
245		if (xsp->extended_state[0] & (uint32_t)XFEM_YMM) {
246			assert(xsp->extended_state[0] & (uint32_t) XFEM_SSE);
247			/* XSAVE container size for all features */
248			assert(xsp->extended_state[2] == sizeof(struct x86_avx_thread_state));
249			fp_register_state_size = sizeof(struct x86_avx_thread_state);
250			fpu_YMM_present = TRUE;
251			set_cr4(get_cr4() | CR4_OSXSAVE);
252			xsetbv(0, XMASK);
253			/* Re-evaluate CPUID, once, to reflect OSXSAVE */
254			if (OSCompareAndSwap(0, 1, &cpuid_reevaluated))
255				cpuid_set_info();
256			/* DRK: consider verifying AVX offset with cpuid(d, ECX:2) */
257		}
258	}
259	else
260		fpu_YMM_present = FALSE;
261
262	fpinit();
263
264	/*
265	 * Trap wait instructions.  Turn off FPU for now.
266	 */
267	set_cr0(get_cr0() | CR0_TS | CR0_MP);
268}
269
270/*
271 * Allocate and initialize FP state for current thread.
272 * Don't load state.
273 */
274static void *
275fp_state_alloc(void)
276{
277	struct x86_fx_thread_state *ifps = zalloc(ifps_zone);
278
279#if	DEBUG
280	if (!(ALIGNED(ifps,64))) {
281		panic("fp_state_alloc: %p, %u, %p, %u", ifps, (unsigned) ifps_zone->elem_size, (void *) ifps_zone->free_elements, (unsigned) ifps_zone->alloc_size);
282	}
283#endif
284	bzero(ifps, sizeof(*ifps));
285	return ifps;
286}
287
288static inline void
289fp_state_free(void *ifps)
290{
291	zfree(ifps_zone, ifps);
292}
293
294void clear_fpu(void)
295{
296	set_ts();
297}
298
299
300static void fpu_load_registers(void *fstate) {
301	struct x86_fx_thread_state *ifps = fstate;
302	fp_save_layout_t layout = ifps->fp_save_layout;
303
304	assert(layout == FXSAVE32 || layout == FXSAVE64 || layout == XSAVE32 || layout == XSAVE64);
305	assert(ALIGNED(ifps, 64));
306	assert(ml_get_interrupts_enabled() == FALSE);
307
308#if	DEBUG
309	if (layout == XSAVE32 || layout == XSAVE64) {
310		struct x86_avx_thread_state *iavx = fstate;
311		unsigned i;
312		/* Verify reserved bits in the XSAVE header*/
313		if (iavx->_xh.xsbv & ~7)
314			panic("iavx->_xh.xsbv: 0x%llx", iavx->_xh.xsbv);
315		for (i = 0; i < sizeof(iavx->_xh.xhrsvd); i++)
316			if (iavx->_xh.xhrsvd[i])
317				panic("Reserved bit set");
318	}
319	if (fpu_YMM_present) {
320		if (layout != XSAVE32 && layout != XSAVE64)
321			panic("Inappropriate layout: %u\n", layout);
322	}
323#endif	/* DEBUG */
324
325	if ((layout == XSAVE64) || (layout == XSAVE32))
326		xrstor(ifps);
327	else
328		fxrstor(ifps);
329}
330
331static void fpu_store_registers(void *fstate, boolean_t is64) {
332	struct x86_fx_thread_state *ifps = fstate;
333	assert(ALIGNED(ifps, 64));
334	if (fpu_YMM_present) {
335		xsave(ifps);
336		ifps->fp_save_layout = is64 ? XSAVE64 : XSAVE32;
337	}
338	else {
339		fxsave(ifps);
340		ifps->fp_save_layout = is64 ? FXSAVE64 : FXSAVE32;
341	}
342}
343
344/*
345 * Initialize FP handling.
346 */
347
348void
349fpu_module_init(void)
350{
351	if ((fp_register_state_size != sizeof(struct x86_fx_thread_state)) &&
352	    (fp_register_state_size != sizeof(struct x86_avx_thread_state)))
353		panic("fpu_module_init: incorrect savearea size %u\n", fp_register_state_size);
354
355	assert(fpu_YMM_present != 0xFFFFFFFF);
356
357	/* We explicitly choose an allocation size of 64
358	 * to eliminate waste for the 832 byte sized
359	 * AVX XSAVE register save area.
360	 */
361	ifps_zone = zinit(fp_register_state_size,
362			  thread_max * fp_register_state_size,
363			  64 * fp_register_state_size,
364			  "x86 fpsave state");
365
366	/* To maintain the required alignment, disable
367	 * zone debugging for this zone as that appends
368	 * 16 bytes to each element.
369	 */
370	zone_change(ifps_zone, Z_ALIGNMENT_REQUIRED, TRUE);
371	/* Determine MXCSR reserved bits and configure initial FPU state*/
372	configure_mxcsr_capability_mask(&initial_fp_state);
373}
374
375/*
376 * Save thread`s FPU context.
377 */
378void
379fpu_save_context(thread_t thread)
380{
381	struct x86_fx_thread_state *ifps;
382
383	assert(ml_get_interrupts_enabled() == FALSE);
384	ifps = (thread)->machine.ifps;
385#if	DEBUG
386	if (ifps && ((ifps->fp_valid != FALSE) && (ifps->fp_valid != TRUE))) {
387		panic("ifps->fp_valid: %u\n", ifps->fp_valid);
388	}
389#endif
390	if (ifps != 0 && (ifps->fp_valid == FALSE)) {
391		/* Clear CR0.TS in preparation for the FP context save. In
392		 * theory, this shouldn't be necessary since a live FPU should
393		 * indicate that TS is clear. However, various routines
394		 * (such as sendsig & sigreturn) manipulate TS directly.
395		 */
396		clear_ts();
397		/* registers are in FPU - save to memory */
398		fpu_store_registers(ifps, (thread_is_64bit(thread) && is_saved_state64(thread->machine.iss)));
399		ifps->fp_valid = TRUE;
400	}
401	set_ts();
402}
403
404
405/*
406 * Free a FPU save area.
407 * Called only when thread terminating - no locking necessary.
408 */
409void
410fpu_free(void *fps)
411{
412	fp_state_free(fps);
413}
414
415/*
416 * Set the floating-point state for a thread based
417 * on the FXSave formatted data. This is basically
418 * the same as fpu_set_state except it uses the
419 * expanded data structure.
420 * If the thread is not the current thread, it is
421 * not running (held).  Locking needed against
422 * concurrent fpu_set_state or fpu_get_state.
423 */
424kern_return_t
425fpu_set_fxstate(
426	thread_t	thr_act,
427	thread_state_t	tstate,
428	thread_flavor_t f)
429{
430	struct x86_fx_thread_state *ifps;
431	struct x86_fx_thread_state *new_ifps;
432	x86_float_state64_t	*state;
433	pcb_t	pcb;
434	size_t	state_size = sizeof(struct x86_fx_thread_state);
435	boolean_t	old_valid, fresh_state = FALSE;
436
437	if (fp_kind == FP_NO)
438		return KERN_FAILURE;
439
440	if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) &&
441	    !ml_fpu_avx_enabled())
442		return KERN_FAILURE;
443
444	state = (x86_float_state64_t *)tstate;
445
446	assert(thr_act != THREAD_NULL);
447	pcb = THREAD_TO_PCB(thr_act);
448
449	if (state == NULL) {
450		/*
451		 * new FPU state is 'invalid'.
452		 * Deallocate the fp state if it exists.
453		 */
454		simple_lock(&pcb->lock);
455
456		ifps = pcb->ifps;
457		pcb->ifps = 0;
458
459		simple_unlock(&pcb->lock);
460
461		if (ifps != 0) {
462			fp_state_free(ifps);
463		}
464	} else {
465		/*
466		 * Valid incoming state. Allocate the fp state if there is none.
467		 */
468		new_ifps = 0;
469		Retry:
470		simple_lock(&pcb->lock);
471
472		ifps = pcb->ifps;
473		if (ifps == 0) {
474			if (new_ifps == 0) {
475				simple_unlock(&pcb->lock);
476				new_ifps = fp_state_alloc();
477				goto Retry;
478			}
479			ifps = new_ifps;
480			new_ifps = 0;
481			pcb->ifps = ifps;
482			fresh_state = TRUE;
483		}
484
485		/*
486		 * now copy over the new data.
487		 */
488
489		old_valid = ifps->fp_valid;
490
491#if	DEBUG || DEVELOPMENT
492		if ((fresh_state == FALSE) && (old_valid == FALSE) && (thr_act != current_thread())) {
493			panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act);
494		}
495#endif
496		/*
497		 * Clear any reserved bits in the MXCSR to prevent a GPF
498		 * when issuing an FXRSTOR.
499		 */
500
501		state->fpu_mxcsr &= mxcsr_capability_mask;
502
503		bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size);
504
505		if (fpu_YMM_present) {
506			struct x86_avx_thread_state *iavx = (void *) ifps;
507			uint32_t fpu_nyreg = 0;
508
509			if (f == x86_AVX_STATE32)
510				fpu_nyreg = 8;
511			else if (f == x86_AVX_STATE64)
512				fpu_nyreg = 16;
513
514			if (fpu_nyreg) {
515				x86_avx_state64_t *ystate = (x86_avx_state64_t *) state;
516				bcopy(&ystate->__fpu_ymmh0, &iavx->x_YMMH_reg[0][0], fpu_nyreg * sizeof(_STRUCT_XMM_REG));
517			}
518
519			iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32;
520			/* Sanitize XSAVE header */
521			bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd));
522			if (fpu_nyreg)
523				iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87);
524			else
525				iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87);
526		} else {
527			ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32;
528		}
529		ifps->fp_valid = old_valid;
530
531		if (old_valid == FALSE) {
532			boolean_t istate = ml_set_interrupts_enabled(FALSE);
533			ifps->fp_valid = TRUE;
534			/* If altering the current thread's state, disable FPU */
535			if (thr_act == current_thread())
536				set_ts();
537
538			ml_set_interrupts_enabled(istate);
539		}
540
541		simple_unlock(&pcb->lock);
542
543		if (new_ifps != 0)
544			fp_state_free(new_ifps);
545	}
546	return KERN_SUCCESS;
547}
548
549/*
550 * Get the floating-point state for a thread.
551 * If the thread is not the current thread, it is
552 * not running (held).  Locking needed against
553 * concurrent fpu_set_state or fpu_get_state.
554 */
555kern_return_t
556fpu_get_fxstate(
557	thread_t	thr_act,
558	thread_state_t	tstate,
559	thread_flavor_t f)
560{
561	struct x86_fx_thread_state	*ifps;
562	x86_float_state64_t	*state;
563	kern_return_t	ret = KERN_FAILURE;
564	pcb_t	pcb;
565	size_t	state_size = sizeof(struct x86_fx_thread_state);
566
567	if (fp_kind == FP_NO)
568		return KERN_FAILURE;
569
570	if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) &&
571	    !ml_fpu_avx_enabled())
572		return KERN_FAILURE;
573
574	state = (x86_float_state64_t *)tstate;
575
576	assert(thr_act != THREAD_NULL);
577	pcb = THREAD_TO_PCB(thr_act);
578
579	simple_lock(&pcb->lock);
580
581	ifps = pcb->ifps;
582	if (ifps == 0) {
583		/*
584		 * No valid floating-point state.
585		 */
586
587		bcopy((char *)&initial_fp_state, (char *)&state->fpu_fcw,
588		    state_size);
589
590		simple_unlock(&pcb->lock);
591
592		return KERN_SUCCESS;
593	}
594	/*
595	 * Make sure we`ve got the latest fp state info
596	 * If the live fpu state belongs to our target
597	 */
598	if (thr_act == current_thread()) {
599		boolean_t	intr;
600
601		intr = ml_set_interrupts_enabled(FALSE);
602
603		clear_ts();
604		fp_save(thr_act);
605		clear_fpu();
606
607		(void)ml_set_interrupts_enabled(intr);
608	}
609	if (ifps->fp_valid) {
610        	bcopy((char *)ifps, (char *)&state->fpu_fcw, state_size);
611		if (fpu_YMM_present) {
612			struct x86_avx_thread_state *iavx = (void *) ifps;
613			uint32_t fpu_nyreg = 0;
614
615			if (f == x86_AVX_STATE32)
616				fpu_nyreg = 8;
617			else if (f == x86_AVX_STATE64)
618				fpu_nyreg = 16;
619
620			if (fpu_nyreg) {
621				x86_avx_state64_t *ystate = (x86_avx_state64_t *) state;
622				bcopy(&iavx->x_YMMH_reg[0][0], &ystate->__fpu_ymmh0, fpu_nyreg * sizeof(_STRUCT_XMM_REG));
623			}
624		}
625
626		ret = KERN_SUCCESS;
627	}
628	simple_unlock(&pcb->lock);
629
630	return ret;
631}
632
633
634
635/*
636 * the child thread is 'stopped' with the thread
637 * mutex held and is currently not known by anyone
638 * so no way for fpu state to get manipulated by an
639 * outside agency -> no need for pcb lock
640 */
641
642void
643fpu_dup_fxstate(
644	thread_t	parent,
645	thread_t	child)
646{
647	struct x86_fx_thread_state *new_ifps = NULL;
648	boolean_t	intr;
649	pcb_t		ppcb;
650
651	ppcb = THREAD_TO_PCB(parent);
652
653	if (ppcb->ifps == NULL)
654	        return;
655
656        if (child->machine.ifps)
657	        panic("fpu_dup_fxstate: child's ifps non-null");
658
659	new_ifps = fp_state_alloc();
660
661	simple_lock(&ppcb->lock);
662
663	if (ppcb->ifps != NULL) {
664		struct x86_fx_thread_state *ifps = ppcb->ifps;
665	        /*
666		 * Make sure we`ve got the latest fp state info
667		 */
668	        intr = ml_set_interrupts_enabled(FALSE);
669		assert(current_thread() == parent);
670		clear_ts();
671		fp_save(parent);
672		clear_fpu();
673
674		(void)ml_set_interrupts_enabled(intr);
675
676		if (ifps->fp_valid) {
677			child->machine.ifps = new_ifps;
678			assert((fp_register_state_size == sizeof(struct x86_fx_thread_state)) ||
679			    (fp_register_state_size == sizeof(struct x86_avx_thread_state)));
680			bcopy((char *)(ppcb->ifps),
681			    (char *)(child->machine.ifps), fp_register_state_size);
682
683			/* Mark the new fp saved state as non-live. */
684			/* Temporarily disabled: radar 4647827
685			 * new_ifps->fp_valid = TRUE;
686			 */
687
688			/*
689			 * Clear any reserved bits in the MXCSR to prevent a GPF
690			 * when issuing an FXRSTOR.
691			 */
692			new_ifps->fx_MXCSR &= mxcsr_capability_mask;
693			new_ifps = NULL;
694		}
695	}
696	simple_unlock(&ppcb->lock);
697
698	if (new_ifps != NULL)
699	        fp_state_free(new_ifps);
700}
701
702
703/*
704 * Initialize FPU.
705 *
706 */
707
708void
709fpinit(void)
710{
711	unsigned short	control;
712
713	clear_ts();
714	fninit();
715	fnstcw(&control);
716	control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */
717	control |= (FPC_PC_64 |		/* Set precision */
718			FPC_RC_RN | 	/* round-to-nearest */
719			FPC_ZE |	/* Suppress zero-divide */
720			FPC_OE |	/*  and overflow */
721			FPC_UE |	/*  underflow */
722			FPC_IE |	/* Allow NaNQs and +-INF */
723			FPC_DE |	/* Allow denorms as operands  */
724			FPC_PE);	/* No trap for precision loss */
725	fldcw(control);
726
727	/* Initialize SSE/SSE2 */
728	__builtin_ia32_ldmxcsr(0x1f80);
729}
730
731/*
732 * Coprocessor not present.
733 */
734
735void
736fpnoextflt(void)
737{
738	boolean_t	intr;
739	thread_t	thr_act;
740	pcb_t		pcb;
741	struct x86_fx_thread_state *ifps = 0;
742
743	thr_act = current_thread();
744	pcb = THREAD_TO_PCB(thr_act);
745
746	assert(fp_register_state_size != 0);
747
748	if (pcb->ifps == 0 && !get_interrupt_level()) {
749	        ifps = fp_state_alloc();
750		bcopy((char *)&initial_fp_state, (char *)ifps,
751		    fp_register_state_size);
752		if (!thread_is_64bit(thr_act)) {
753			ifps->fp_save_layout = fpu_YMM_present ? XSAVE32 : FXSAVE32;
754		}
755		else
756			ifps->fp_save_layout = fpu_YMM_present ? XSAVE64 : FXSAVE64;
757		ifps->fp_valid = TRUE;
758	}
759	intr = ml_set_interrupts_enabled(FALSE);
760
761	clear_ts();			/*  Enable FPU use */
762
763	if (__improbable(get_interrupt_level())) {
764		/*
765		 * Save current coprocessor context if valid
766		 * Initialize coprocessor live context
767		 */
768		fp_save(thr_act);
769		fpinit();
770	} else {
771	        if (pcb->ifps == 0) {
772		        pcb->ifps = ifps;
773			ifps = 0;
774		}
775		/*
776		 * Load this thread`s state into coprocessor live context.
777		 */
778		fp_load(thr_act);
779	}
780	(void)ml_set_interrupts_enabled(intr);
781
782	if (ifps)
783	        fp_state_free(ifps);
784}
785
786/*
787 * FPU overran end of segment.
788 * Re-initialize FPU.  Floating point state is not valid.
789 */
790
791void
792fpextovrflt(void)
793{
794	thread_t	thr_act = current_thread();
795	pcb_t		pcb;
796	struct x86_fx_thread_state *ifps;
797	boolean_t	intr;
798
799	intr = ml_set_interrupts_enabled(FALSE);
800
801	if (get_interrupt_level())
802		panic("FPU segment overrun exception  at interrupt context\n");
803	if (current_task() == kernel_task)
804		panic("FPU segment overrun exception in kernel thread context\n");
805
806	/*
807	 * This is a non-recoverable error.
808	 * Invalidate the thread`s FPU state.
809	 */
810	pcb = THREAD_TO_PCB(thr_act);
811	simple_lock(&pcb->lock);
812	ifps = pcb->ifps;
813	pcb->ifps = 0;
814	simple_unlock(&pcb->lock);
815
816	/*
817	 * Re-initialize the FPU.
818	 */
819	clear_ts();
820	fninit();
821
822	/*
823	 * And disable access.
824	 */
825	clear_fpu();
826
827	(void)ml_set_interrupts_enabled(intr);
828
829	if (ifps)
830	    zfree(ifps_zone, ifps);
831
832	/*
833	 * Raise exception.
834	 */
835	i386_exception(EXC_BAD_ACCESS, VM_PROT_READ|VM_PROT_EXECUTE, 0);
836	/*NOTREACHED*/
837}
838
839/*
840 * FPU error. Called by AST.
841 */
842
843void
844fpexterrflt(void)
845{
846	thread_t	thr_act = current_thread();
847	struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
848	boolean_t	intr;
849
850	intr = ml_set_interrupts_enabled(FALSE);
851
852	if (get_interrupt_level())
853		panic("FPU error exception at interrupt context\n");
854	if (current_task() == kernel_task)
855		panic("FPU error exception in kernel thread context\n");
856
857	/*
858	 * Save the FPU state and turn off the FPU.
859	 */
860	fp_save(thr_act);
861
862	(void)ml_set_interrupts_enabled(intr);
863
864	/*
865	 * Raise FPU exception.
866	 * Locking not needed on pcb->ifps,
867	 * since thread is running.
868	 */
869	i386_exception(EXC_ARITHMETIC,
870		       EXC_I386_EXTERR,
871		       ifps->fx_status);
872
873	/*NOTREACHED*/
874}
875
876/*
877 * Save FPU state.
878 *
879 * Locking not needed:
880 * .	if called from fpu_get_state, pcb already locked.
881 * .	if called from fpnoextflt or fp_intr, we are single-cpu
882 * .	otherwise, thread is running.
883 * N.B.: Must be called with interrupts disabled
884 */
885
886void
887fp_save(
888	thread_t	thr_act)
889{
890	pcb_t pcb = THREAD_TO_PCB(thr_act);
891	struct x86_fx_thread_state *ifps = pcb->ifps;
892
893	assert(ifps != 0);
894	if (ifps != 0 && !ifps->fp_valid) {
895		assert((get_cr0() & CR0_TS) == 0);
896		/* registers are in FPU */
897		ifps->fp_valid = TRUE;
898		fpu_store_registers(ifps, thread_is_64bit(thr_act));
899	}
900}
901
902/*
903 * Restore FPU state from PCB.
904 *
905 * Locking not needed; always called on the current thread.
906 */
907
908void
909fp_load(
910	thread_t	thr_act)
911{
912	pcb_t pcb = THREAD_TO_PCB(thr_act);
913	struct x86_fx_thread_state *ifps = pcb->ifps;
914
915	assert(ifps);
916#if	DEBUG
917	if (ifps->fp_valid != FALSE && ifps->fp_valid != TRUE) {
918		panic("fp_load() invalid fp_valid: %u, fp_save_layout: %u\n",
919		      ifps->fp_valid, ifps->fp_save_layout);
920	}
921#endif
922
923	if (ifps->fp_valid == FALSE) {
924		fpinit();
925	} else {
926		fpu_load_registers(ifps);
927	}
928	ifps->fp_valid = FALSE;		/* in FPU */
929}
930
931/*
932 * SSE arithmetic exception handling code.
933 * Basically the same as the x87 exception handler with a different subtype
934 */
935
936void
937fpSSEexterrflt(void)
938{
939	thread_t	thr_act = current_thread();
940	struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
941	boolean_t	intr;
942
943	intr = ml_set_interrupts_enabled(FALSE);
944
945	if (get_interrupt_level())
946		panic("SSE exception at interrupt context\n");
947	if (current_task() == kernel_task)
948		panic("SSE exception in kernel thread context\n");
949
950	/*
951	 * Save the FPU state and turn off the FPU.
952	 */
953	fp_save(thr_act);
954
955	(void)ml_set_interrupts_enabled(intr);
956	/*
957	 * Raise FPU exception.
958	 * Locking not needed on pcb->ifps,
959	 * since thread is running.
960	 */
961
962	i386_exception(EXC_ARITHMETIC,
963		       EXC_I386_SSEEXTERR,
964		       ifps->fx_MXCSR);
965	/*NOTREACHED*/
966}
967
968void
969fp_setvalid(boolean_t value) {
970        thread_t	thr_act = current_thread();
971	struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
972
973	if (ifps) {
974	        ifps->fp_valid = value;
975
976		if (value == TRUE) {
977			boolean_t istate = ml_set_interrupts_enabled(FALSE);
978		        clear_fpu();
979			ml_set_interrupts_enabled(istate);
980		}
981	}
982}
983
984boolean_t
985ml_fpu_avx_enabled(void) {
986	return (fpu_YMM_present == TRUE);
987}
988