1/*
2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1992-1990 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
49 *  School of Computer Science
50 *  Carnegie Mellon University
51 *  Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
59#include <platforms.h>
60
61#include <mach/exception_types.h>
62#include <mach/i386/thread_status.h>
63#include <mach/i386/fp_reg.h>
64#include <mach/branch_predicates.h>
65
66#include <kern/mach_param.h>
67#include <kern/processor.h>
68#include <kern/thread.h>
69#include <kern/zalloc.h>
70#include <kern/misc_protos.h>
71#include <kern/spl.h>
72#include <kern/assert.h>
73
74#include <libkern/OSAtomic.h>
75
76#include <architecture/i386/pio.h>
77#include <i386/cpuid.h>
78#include <i386/fpu.h>
79#include <i386/proc_reg.h>
80#include <i386/misc_protos.h>
81#include <i386/thread.h>
82#include <i386/trap.h>
83
84int		fp_kind = FP_NO;	/* not inited */
85zone_t		ifps_zone;		/* zone for FPU save area */
86
87#define ALIGNED(addr,size)	(((uintptr_t)(addr)&((size)-1))==0)
88
89/* Forward */
90
91extern void		fpinit(void);
92extern void		fp_save(
93				thread_t	thr_act);
94extern void		fp_load(
95				thread_t	thr_act);
96
97static void configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps);
98
99struct x86_avx_thread_state initial_fp_state __attribute((aligned(64)));
100
101
102/* Global MXCSR capability bitmask */
103static unsigned int mxcsr_capability_mask;
104
105#define	fninit() \
106	__asm__ volatile("fninit")
107
108#define	fnstcw(control) \
109	__asm__("fnstcw %0" : "=m" (*(unsigned short *)(control)))
110
111#define	fldcw(control) \
112	__asm__ volatile("fldcw %0" : : "m" (*(unsigned short *) &(control)) )
113
114#define	fnclex() \
115	__asm__ volatile("fnclex")
116
117#define	fnsave(state)  \
118	__asm__ volatile("fnsave %0" : "=m" (*state))
119
120#define	frstor(state) \
121	__asm__ volatile("frstor %0" : : "m" (state))
122
123#define fwait() \
124    	__asm__("fwait");
125
126#define fxrstor(addr)           __asm__ __volatile__("fxrstor %0" : : "m" (*(addr)))
127#define fxsave(addr)            __asm__ __volatile__("fxsave %0" : "=m" (*(addr)))
128
129static uint32_t	fp_register_state_size = 0;
130static uint32_t fpu_YMM_present	= FALSE;
131static uint32_t	cpuid_reevaluated = 0;
132
133static void fpu_store_registers(void *, boolean_t);
134static void fpu_load_registers(void *);
135
136extern	void xsave64o(void);
137extern	void xrstor64o(void);
138
139#define XMASK ((uint32_t) (XFEM_X87 | XFEM_SSE | XFEM_YMM))
140
141/* DRK: TODO replace opcodes with mnemonics when assembler support available */
142
143static inline void xsetbv(uint32_t mask_hi, uint32_t mask_lo) {
144	__asm__ __volatile__(".short 0x010F\n\t.byte 0xD1" :: "a"(mask_lo), "d"(mask_hi), "c" (XCR0));
145}
146
147static inline void xsave(void *a) {
148	/* MOD 0x4, operand ECX 0x1 */
149	__asm__ __volatile__(".short 0xAE0F\n\t.byte 0x21" :: "a"(XMASK), "d"(0), "c" (a));
150}
151
152static inline void xrstor(void *a) {
153	/* MOD 0x5, operand ECX 0x1 */
154	__asm__ __volatile__(".short 0xAE0F\n\t.byte 0x29" :: "a"(XMASK), "d"(0), "c" (a));
155}
156
157static inline void xsave64(void *a) {
158	/* Out of line call that executes in 64-bit mode on K32 */
159	__asm__ __volatile__("call _xsave64o" :: "a"(XMASK), "d"(0), "c" (a));
160}
161
162static inline void xrstor64(void *a) {
163	/* Out of line call that executes in 64-bit mode on K32 */
164	__asm__ __volatile__("call _xrstor64o" :: "a"(XMASK), "d"(0), "c" (a));
165}
166
167static inline unsigned short
168fnstsw(void)
169{
170	unsigned short status;
171	__asm__ volatile("fnstsw %0" : "=ma" (status));
172	return(status);
173}
174
175/*
176 * Configure the initial FPU state presented to new threads.
177 * Determine the MXCSR capability mask, which allows us to mask off any
178 * potentially unsafe "reserved" bits before restoring the FPU context.
179 * *Not* per-cpu, assumes symmetry.
180 */
181
182static void
183configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps)
184{
185	/* XSAVE requires a 64 byte aligned store */
186	assert(ALIGNED(fps, 64));
187	/* Clear, to prepare for the diagnostic FXSAVE */
188	bzero(fps, sizeof(*fps));
189
190	fpinit();
191	fpu_store_registers(fps, FALSE);
192
193	mxcsr_capability_mask = fps->fx_MXCSR_MASK;
194
195	/* Set default mask value if necessary */
196	if (mxcsr_capability_mask == 0)
197		mxcsr_capability_mask = 0xffbf;
198
199	/* Clear vector register store */
200	bzero(&fps->fx_XMM_reg[0][0], sizeof(fps->fx_XMM_reg));
201	bzero(&fps->x_YMMH_reg[0][0], sizeof(fps->x_YMMH_reg));
202
203	fps->fp_valid = TRUE;
204	fps->fp_save_layout = fpu_YMM_present ? XSAVE32: FXSAVE32;
205	fpu_load_registers(fps);
206
207	/* Poison values to trap unsafe usage */
208	fps->fp_valid = 0xFFFFFFFF;
209	fps->fp_save_layout = FP_UNUSED;
210
211	/* Re-enable FPU/SSE DNA exceptions */
212	set_ts();
213}
214
215
216/*
217 * Look for FPU and initialize it.
218 * Called on each CPU.
219 */
220void
221init_fpu(void)
222{
223#if	DEBUG
224	unsigned short	status;
225	unsigned short 	control;
226#endif
227	/*
228	 * Check for FPU by initializing it,
229	 * then trying to read the correct bit patterns from
230	 * the control and status registers.
231	 */
232	set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE);	/* allow use of FPU */
233	fninit();
234#if	DEBUG
235	status = fnstsw();
236	fnstcw(&control);
237
238	assert(((status & 0xff) == 0) && ((control & 0x103f) == 0x3f));
239#endif
240	/* Advertise SSE support */
241	if (cpuid_features() & CPUID_FEATURE_FXSR) {
242		fp_kind = FP_FXSR;
243		set_cr4(get_cr4() | CR4_OSFXS);
244		/* And allow SIMD exceptions if present */
245		if (cpuid_features() & CPUID_FEATURE_SSE) {
246			set_cr4(get_cr4() | CR4_OSXMM);
247		}
248		fp_register_state_size = sizeof(struct x86_fx_thread_state);
249
250	} else
251		panic("fpu is not FP_FXSR");
252
253	/* Configure the XSAVE context mechanism if the processor supports
254	 * AVX/YMM registers
255	 */
256	if (cpuid_features() & CPUID_FEATURE_XSAVE) {
257		cpuid_xsave_leaf_t *xsp = &cpuid_info()->cpuid_xsave_leaf;
258		if (xsp->extended_state[0] & (uint32_t)XFEM_YMM) {
259			assert(xsp->extended_state[0] & (uint32_t) XFEM_SSE);
260			/* XSAVE container size for all features */
261			assert(xsp->extended_state[2] == sizeof(struct x86_avx_thread_state));
262			fp_register_state_size = sizeof(struct x86_avx_thread_state);
263			fpu_YMM_present = TRUE;
264			set_cr4(get_cr4() | CR4_OSXSAVE);
265			xsetbv(0, XMASK);
266			/* Re-evaluate CPUID, once, to reflect OSXSAVE */
267			if (OSCompareAndSwap(0, 1, &cpuid_reevaluated))
268				cpuid_set_info();
269			/* DRK: consider verifying AVX offset with cpuid(d, ECX:2) */
270		}
271	}
272	else
273		fpu_YMM_present = FALSE;
274
275	fpinit();
276
277	/*
278	 * Trap wait instructions.  Turn off FPU for now.
279	 */
280	set_cr0(get_cr0() | CR0_TS | CR0_MP);
281}
282
283/*
284 * Allocate and initialize FP state for current thread.
285 * Don't load state.
286 */
287static void *
288fp_state_alloc(void)
289{
290	void *ifps = zalloc(ifps_zone);
291
292#if	DEBUG
293	if (!(ALIGNED(ifps,64))) {
294		panic("fp_state_alloc: %p, %u, %p, %u", ifps, (unsigned) ifps_zone->elem_size, (void *) ifps_zone->free_elements, (unsigned) ifps_zone->alloc_size);
295	}
296#endif
297	return ifps;
298}
299
300static inline void
301fp_state_free(void *ifps)
302{
303	zfree(ifps_zone, ifps);
304}
305
306void clear_fpu(void)
307{
308	set_ts();
309}
310
311
312static void fpu_load_registers(void *fstate) {
313	struct x86_fx_thread_state *ifps = fstate;
314	fp_save_layout_t layout = ifps->fp_save_layout;
315
316	assert(layout == FXSAVE32 || layout == FXSAVE64 || layout == XSAVE32 || layout == XSAVE64);
317	assert(ALIGNED(ifps, 64));
318	assert(ml_get_interrupts_enabled() == FALSE);
319
320#if	DEBUG
321	if (layout == XSAVE32 || layout == XSAVE64) {
322		struct x86_avx_thread_state *iavx = fstate;
323		unsigned i;
324		/* Verify reserved bits in the XSAVE header*/
325		if (iavx->_xh.xsbv & ~7)
326			panic("iavx->_xh.xsbv: 0x%llx", iavx->_xh.xsbv);
327		for (i = 0; i < sizeof(iavx->_xh.xhrsvd); i++)
328			if (iavx->_xh.xhrsvd[i])
329				panic("Reserved bit set");
330	}
331	if (fpu_YMM_present) {
332		if (layout != XSAVE32 && layout != XSAVE64)
333			panic("Inappropriate layout: %u\n", layout);
334	}
335#endif	/* DEBUG */
336
337#if defined(__i386__)
338	if (layout == FXSAVE32) {
339		/* Restore the compatibility/legacy mode XMM+x87 state */
340		fxrstor(ifps);
341	}
342	else if (layout == FXSAVE64) {
343		fxrstor64(ifps);
344	}
345	else if (layout == XSAVE32) {
346		xrstor(ifps);
347	}
348	else if (layout == XSAVE64) {
349		xrstor64(ifps);
350	}
351#elif defined(__x86_64__)
352	if ((layout == XSAVE64) || (layout == XSAVE32))
353		xrstor(ifps);
354	else
355		fxrstor(ifps);
356#endif
357}
358
359static void fpu_store_registers(void *fstate, boolean_t is64) {
360	struct x86_fx_thread_state *ifps = fstate;
361	assert(ALIGNED(ifps, 64));
362#if defined(__i386__)
363	if (!is64) {
364		if (fpu_YMM_present) {
365			xsave(ifps);
366			ifps->fp_save_layout = XSAVE32;
367		}
368		else {
369			/* save the compatibility/legacy mode XMM+x87 state */
370			fxsave(ifps);
371			ifps->fp_save_layout = FXSAVE32;
372		}
373	}
374	else {
375		if (fpu_YMM_present) {
376			xsave64(ifps);
377			ifps->fp_save_layout = XSAVE64;
378		}
379		else {
380			fxsave64(ifps);
381			ifps->fp_save_layout = FXSAVE64;
382		}
383	}
384#elif defined(__x86_64__)
385	if (fpu_YMM_present) {
386		xsave(ifps);
387		ifps->fp_save_layout = is64 ? XSAVE64 : XSAVE32;
388	}
389	else {
390		fxsave(ifps);
391		ifps->fp_save_layout = is64 ? FXSAVE64 : FXSAVE32;
392	}
393#endif
394}
395
396/*
397 * Initialize FP handling.
398 */
399
400void
401fpu_module_init(void)
402{
403	if ((fp_register_state_size != sizeof(struct x86_fx_thread_state)) &&
404	    (fp_register_state_size != sizeof(struct x86_avx_thread_state)))
405		panic("fpu_module_init: incorrect savearea size %u\n", fp_register_state_size);
406
407	assert(fpu_YMM_present != 0xFFFFFFFF);
408
409	/* We explicitly choose an allocation size of 64
410	 * to eliminate waste for the 832 byte sized
411	 * AVX XSAVE register save area.
412	 */
413	ifps_zone = zinit(fp_register_state_size,
414			  thread_max * fp_register_state_size,
415			  64 * fp_register_state_size,
416			  "x86 fpsave state");
417
418	/* To maintain the required alignment, disable
419	 * zone debugging for this zone as that appends
420	 * 16 bytes to each element.
421	 */
422	zone_change(ifps_zone, Z_ALIGNMENT_REQUIRED, TRUE);
423	/* Determine MXCSR reserved bits and configure initial FPU state*/
424	configure_mxcsr_capability_mask(&initial_fp_state);
425}
426
427/*
428 * Save thread`s FPU context.
429 */
430void
431fpu_save_context(thread_t thread)
432{
433	struct x86_fx_thread_state *ifps;
434
435	assert(ml_get_interrupts_enabled() == FALSE);
436	ifps = (thread)->machine.ifps;
437#if	DEBUG
438	if (ifps && ((ifps->fp_valid != FALSE) && (ifps->fp_valid != TRUE))) {
439		panic("ifps->fp_valid: %u\n", ifps->fp_valid);
440	}
441#endif
442	if (ifps != 0 && (ifps->fp_valid == FALSE)) {
443		/* Clear CR0.TS in preparation for the FP context save. In
444		 * theory, this shouldn't be necessary since a live FPU should
445		 * indicate that TS is clear. However, various routines
446		 * (such as sendsig & sigreturn) manipulate TS directly.
447		 */
448		clear_ts();
449		/* registers are in FPU - save to memory */
450		fpu_store_registers(ifps, (thread_is_64bit(thread) && is_saved_state64(thread->machine.iss)));
451		ifps->fp_valid = TRUE;
452	}
453	set_ts();
454}
455
456
457/*
458 * Free a FPU save area.
459 * Called only when thread terminating - no locking necessary.
460 */
461void
462fpu_free(void *fps)
463{
464	fp_state_free(fps);
465}
466
467/*
468 * Set the floating-point state for a thread based
469 * on the FXSave formatted data. This is basically
470 * the same as fpu_set_state except it uses the
471 * expanded data structure.
472 * If the thread is not the current thread, it is
473 * not running (held).  Locking needed against
474 * concurrent fpu_set_state or fpu_get_state.
475 */
476kern_return_t
477fpu_set_fxstate(
478	thread_t	thr_act,
479	thread_state_t	tstate,
480	thread_flavor_t f)
481{
482	struct x86_fx_thread_state *ifps;
483	struct x86_fx_thread_state *new_ifps;
484	x86_float_state64_t	*state;
485	pcb_t	pcb;
486	size_t	state_size = sizeof(struct x86_fx_thread_state);
487	boolean_t	old_valid;
488	if (fp_kind == FP_NO)
489	    return KERN_FAILURE;
490
491	if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) &&
492	    !ml_fpu_avx_enabled())
493	    return KERN_FAILURE;
494
495	state = (x86_float_state64_t *)tstate;
496
497	assert(thr_act != THREAD_NULL);
498	pcb = THREAD_TO_PCB(thr_act);
499
500	if (state == NULL) {
501	    /*
502	     * new FPU state is 'invalid'.
503	     * Deallocate the fp state if it exists.
504	     */
505	    simple_lock(&pcb->lock);
506
507		ifps = pcb->ifps;
508		pcb->ifps = 0;
509
510	    simple_unlock(&pcb->lock);
511
512		if (ifps != 0)
513		fp_state_free(ifps);
514	} else {
515	    /*
516	     * Valid state.  Allocate the fp state if there is none.
517	     */
518	    new_ifps = 0;
519	Retry:
520	    simple_lock(&pcb->lock);
521
522		ifps = pcb->ifps;
523	    if (ifps == 0) {
524		if (new_ifps == 0) {
525		    simple_unlock(&pcb->lock);
526		    new_ifps = fp_state_alloc();
527		    goto Retry;
528		}
529		ifps = new_ifps;
530		new_ifps = 0;
531		pcb->ifps = ifps;
532	    }
533	    /*
534	     * now copy over the new data.
535	     */
536	    old_valid = ifps->fp_valid;
537
538#if	DEBUG
539	    if ((old_valid == FALSE) && (thr_act != current_thread())) {
540		    panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act);
541	    }
542#endif
543	    /*
544	     * Clear any reserved bits in the MXCSR to prevent a GPF
545	     * when issuing an FXRSTOR.
546	     */
547
548	    state->fpu_mxcsr &= mxcsr_capability_mask;
549
550	    bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size);
551
552	    if (fpu_YMM_present) {
553		struct x86_avx_thread_state *iavx = (void *) ifps;
554		uint32_t fpu_nyreg = 0;
555
556		if (f == x86_AVX_STATE32)
557			fpu_nyreg = 8;
558		else if (f == x86_AVX_STATE64)
559			fpu_nyreg = 16;
560
561		if (fpu_nyreg) {
562			x86_avx_state64_t *ystate = (x86_avx_state64_t *) state;
563			bcopy(&ystate->__fpu_ymmh0, &iavx->x_YMMH_reg[0][0], fpu_nyreg * sizeof(_STRUCT_XMM_REG));
564		}
565
566		iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32;
567		/* Sanitize XSAVE header */
568		bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd));
569		if (fpu_nyreg)
570			iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87);
571		else
572			iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87);
573	    }
574	    else
575		ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32;
576	    ifps->fp_valid = old_valid;
577
578	    if (old_valid == FALSE) {
579		    boolean_t istate = ml_set_interrupts_enabled(FALSE);
580		    ifps->fp_valid = TRUE;
581		    set_ts();
582		    ml_set_interrupts_enabled(istate);
583	    }
584
585	    simple_unlock(&pcb->lock);
586
587	    if (new_ifps != 0)
588		fp_state_free(new_ifps);
589	}
590	return KERN_SUCCESS;
591}
592
593/*
594 * Get the floating-point state for a thread.
595 * If the thread is not the current thread, it is
596 * not running (held).  Locking needed against
597 * concurrent fpu_set_state or fpu_get_state.
598 */
599kern_return_t
600fpu_get_fxstate(
601	thread_t	thr_act,
602	thread_state_t	tstate,
603	thread_flavor_t f)
604{
605	struct x86_fx_thread_state	*ifps;
606	x86_float_state64_t	*state;
607	kern_return_t	ret = KERN_FAILURE;
608	pcb_t	pcb;
609	size_t	state_size = sizeof(struct x86_fx_thread_state);
610
611	if (fp_kind == FP_NO)
612		return KERN_FAILURE;
613
614	if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) &&
615	    !ml_fpu_avx_enabled())
616		return KERN_FAILURE;
617
618	state = (x86_float_state64_t *)tstate;
619
620	assert(thr_act != THREAD_NULL);
621	pcb = THREAD_TO_PCB(thr_act);
622
623	simple_lock(&pcb->lock);
624
625	ifps = pcb->ifps;
626	if (ifps == 0) {
627		/*
628		 * No valid floating-point state.
629		 */
630
631		bcopy((char *)&initial_fp_state, (char *)&state->fpu_fcw,
632		    state_size);
633
634		simple_unlock(&pcb->lock);
635
636		return KERN_SUCCESS;
637	}
638	/*
639	 * Make sure we`ve got the latest fp state info
640	 * If the live fpu state belongs to our target
641	 */
642	if (thr_act == current_thread()) {
643		boolean_t	intr;
644
645		intr = ml_set_interrupts_enabled(FALSE);
646
647		clear_ts();
648		fp_save(thr_act);
649		clear_fpu();
650
651		(void)ml_set_interrupts_enabled(intr);
652	}
653	if (ifps->fp_valid) {
654        	bcopy((char *)ifps, (char *)&state->fpu_fcw, state_size);
655		if (fpu_YMM_present) {
656			struct x86_avx_thread_state *iavx = (void *) ifps;
657			uint32_t fpu_nyreg = 0;
658
659			if (f == x86_AVX_STATE32)
660				fpu_nyreg = 8;
661			else if (f == x86_AVX_STATE64)
662				fpu_nyreg = 16;
663
664			if (fpu_nyreg) {
665				x86_avx_state64_t *ystate = (x86_avx_state64_t *) state;
666				bcopy(&iavx->x_YMMH_reg[0][0], &ystate->__fpu_ymmh0, fpu_nyreg * sizeof(_STRUCT_XMM_REG));
667			}
668		}
669
670		ret = KERN_SUCCESS;
671	}
672	simple_unlock(&pcb->lock);
673
674	return ret;
675}
676
677
678
679/*
680 * the child thread is 'stopped' with the thread
681 * mutex held and is currently not known by anyone
682 * so no way for fpu state to get manipulated by an
683 * outside agency -> no need for pcb lock
684 */
685
686void
687fpu_dup_fxstate(
688	thread_t	parent,
689	thread_t	child)
690{
691	struct x86_fx_thread_state *new_ifps = NULL;
692	boolean_t	intr;
693	pcb_t		ppcb;
694
695	ppcb = THREAD_TO_PCB(parent);
696
697	if (ppcb->ifps == NULL)
698	        return;
699
700        if (child->machine.ifps)
701	        panic("fpu_dup_fxstate: child's ifps non-null");
702
703	new_ifps = fp_state_alloc();
704
705	simple_lock(&ppcb->lock);
706
707	if (ppcb->ifps != NULL) {
708		struct x86_fx_thread_state *ifps = ppcb->ifps;
709	        /*
710		 * Make sure we`ve got the latest fp state info
711		 */
712	        intr = ml_set_interrupts_enabled(FALSE);
713		assert(current_thread() == parent);
714		clear_ts();
715		fp_save(parent);
716		clear_fpu();
717
718		(void)ml_set_interrupts_enabled(intr);
719
720		if (ifps->fp_valid) {
721			child->machine.ifps = new_ifps;
722			assert((fp_register_state_size == sizeof(struct x86_fx_thread_state)) ||
723			    (fp_register_state_size == sizeof(struct x86_avx_thread_state)));
724			bcopy((char *)(ppcb->ifps),
725			    (char *)(child->machine.ifps), fp_register_state_size);
726
727			/* Mark the new fp saved state as non-live. */
728			/* Temporarily disabled: radar 4647827
729			 * new_ifps->fp_valid = TRUE;
730			 */
731
732			/*
733			 * Clear any reserved bits in the MXCSR to prevent a GPF
734			 * when issuing an FXRSTOR.
735			 */
736			new_ifps->fx_MXCSR &= mxcsr_capability_mask;
737			new_ifps = NULL;
738		}
739	}
740	simple_unlock(&ppcb->lock);
741
742	if (new_ifps != NULL)
743	        fp_state_free(new_ifps);
744}
745
746
747/*
748 * Initialize FPU.
749 *
750 */
751
752void
753fpinit(void)
754{
755	unsigned short	control;
756
757	clear_ts();
758	fninit();
759	fnstcw(&control);
760	control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */
761	control |= (FPC_PC_64 |		/* Set precision */
762			FPC_RC_RN | 	/* round-to-nearest */
763			FPC_ZE |	/* Suppress zero-divide */
764			FPC_OE |	/*  and overflow */
765			FPC_UE |	/*  underflow */
766			FPC_IE |	/* Allow NaNQs and +-INF */
767			FPC_DE |	/* Allow denorms as operands  */
768			FPC_PE);	/* No trap for precision loss */
769	fldcw(control);
770
771	/* Initialize SSE/SSE2 */
772	__builtin_ia32_ldmxcsr(0x1f80);
773}
774
775/*
776 * Coprocessor not present.
777 */
778
779void
780fpnoextflt(void)
781{
782	boolean_t	intr;
783	thread_t	thr_act;
784	pcb_t		pcb;
785	struct x86_fx_thread_state *ifps = 0;
786
787	thr_act = current_thread();
788	pcb = THREAD_TO_PCB(thr_act);
789
790	assert(fp_register_state_size != 0);
791
792	if (pcb->ifps == 0 && !get_interrupt_level()) {
793	        ifps = fp_state_alloc();
794		bcopy((char *)&initial_fp_state, (char *)ifps,
795		    fp_register_state_size);
796		if (!thread_is_64bit(thr_act)) {
797			ifps->fp_save_layout = fpu_YMM_present ? XSAVE32 : FXSAVE32;
798		}
799		else
800			ifps->fp_save_layout = fpu_YMM_present ? XSAVE64 : FXSAVE64;
801		ifps->fp_valid = TRUE;
802	}
803	intr = ml_set_interrupts_enabled(FALSE);
804
805	clear_ts();			/*  Enable FPU use */
806
807	if (__improbable(get_interrupt_level())) {
808		/*
809		 * Save current coprocessor context if valid
810		 * Initialize coprocessor live context
811		 */
812		fp_save(thr_act);
813		fpinit();
814	} else {
815	        if (pcb->ifps == 0) {
816		        pcb->ifps = ifps;
817			ifps = 0;
818		}
819		/*
820		 * Load this thread`s state into coprocessor live context.
821		 */
822		fp_load(thr_act);
823	}
824	(void)ml_set_interrupts_enabled(intr);
825
826	if (ifps)
827	        fp_state_free(ifps);
828}
829
830/*
831 * FPU overran end of segment.
832 * Re-initialize FPU.  Floating point state is not valid.
833 */
834
835void
836fpextovrflt(void)
837{
838	thread_t	thr_act = current_thread();
839	pcb_t		pcb;
840	struct x86_fx_thread_state *ifps;
841	boolean_t	intr;
842
843	intr = ml_set_interrupts_enabled(FALSE);
844
845	if (get_interrupt_level())
846		panic("FPU segment overrun exception  at interrupt context\n");
847	if (current_task() == kernel_task)
848		panic("FPU segment overrun exception in kernel thread context\n");
849
850	/*
851	 * This is a non-recoverable error.
852	 * Invalidate the thread`s FPU state.
853	 */
854	pcb = THREAD_TO_PCB(thr_act);
855	simple_lock(&pcb->lock);
856	ifps = pcb->ifps;
857	pcb->ifps = 0;
858	simple_unlock(&pcb->lock);
859
860	/*
861	 * Re-initialize the FPU.
862	 */
863	clear_ts();
864	fninit();
865
866	/*
867	 * And disable access.
868	 */
869	clear_fpu();
870
871	(void)ml_set_interrupts_enabled(intr);
872
873	if (ifps)
874	    zfree(ifps_zone, ifps);
875
876	/*
877	 * Raise exception.
878	 */
879	i386_exception(EXC_BAD_ACCESS, VM_PROT_READ|VM_PROT_EXECUTE, 0);
880	/*NOTREACHED*/
881}
882
883/*
884 * FPU error. Called by AST.
885 */
886
887void
888fpexterrflt(void)
889{
890	thread_t	thr_act = current_thread();
891	struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
892	boolean_t	intr;
893
894	intr = ml_set_interrupts_enabled(FALSE);
895
896	if (get_interrupt_level())
897		panic("FPU error exception at interrupt context\n");
898	if (current_task() == kernel_task)
899		panic("FPU error exception in kernel thread context\n");
900
901	/*
902	 * Save the FPU state and turn off the FPU.
903	 */
904	fp_save(thr_act);
905
906	(void)ml_set_interrupts_enabled(intr);
907
908	/*
909	 * Raise FPU exception.
910	 * Locking not needed on pcb->ifps,
911	 * since thread is running.
912	 */
913	i386_exception(EXC_ARITHMETIC,
914		       EXC_I386_EXTERR,
915		       ifps->fx_status);
916
917	/*NOTREACHED*/
918}
919
920/*
921 * Save FPU state.
922 *
923 * Locking not needed:
924 * .	if called from fpu_get_state, pcb already locked.
925 * .	if called from fpnoextflt or fp_intr, we are single-cpu
926 * .	otherwise, thread is running.
927 * N.B.: Must be called with interrupts disabled
928 */
929
930void
931fp_save(
932	thread_t	thr_act)
933{
934	pcb_t pcb = THREAD_TO_PCB(thr_act);
935	struct x86_fx_thread_state *ifps = pcb->ifps;
936
937	assert(ifps != 0);
938	if (ifps != 0 && !ifps->fp_valid) {
939		assert((get_cr0() & CR0_TS) == 0);
940		/* registers are in FPU */
941		ifps->fp_valid = TRUE;
942		fpu_store_registers(ifps, thread_is_64bit(thr_act));
943	}
944}
945
946/*
947 * Restore FPU state from PCB.
948 *
949 * Locking not needed; always called on the current thread.
950 */
951
952void
953fp_load(
954	thread_t	thr_act)
955{
956	pcb_t pcb = THREAD_TO_PCB(thr_act);
957	struct x86_fx_thread_state *ifps = pcb->ifps;
958
959	assert(ifps);
960	assert(ifps->fp_valid == FALSE || ifps->fp_valid == TRUE);
961
962	if (ifps->fp_valid == FALSE) {
963		fpinit();
964	} else {
965		fpu_load_registers(ifps);
966	}
967	ifps->fp_valid = FALSE;		/* in FPU */
968}
969
970/*
971 * SSE arithmetic exception handling code.
972 * Basically the same as the x87 exception handler with a different subtype
973 */
974
975void
976fpSSEexterrflt(void)
977{
978	thread_t	thr_act = current_thread();
979	struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
980	boolean_t	intr;
981
982	intr = ml_set_interrupts_enabled(FALSE);
983
984	if (get_interrupt_level())
985		panic("SSE exception at interrupt context\n");
986	if (current_task() == kernel_task)
987		panic("SSE exception in kernel thread context\n");
988
989	/*
990	 * Save the FPU state and turn off the FPU.
991	 */
992	fp_save(thr_act);
993
994	(void)ml_set_interrupts_enabled(intr);
995	/*
996	 * Raise FPU exception.
997	 * Locking not needed on pcb->ifps,
998	 * since thread is running.
999	 */
1000	assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64);
1001	i386_exception(EXC_ARITHMETIC,
1002		       EXC_I386_SSEEXTERR,
1003		       ifps->fx_MXCSR);
1004	/*NOTREACHED*/
1005}
1006
1007void
1008fp_setvalid(boolean_t value) {
1009        thread_t	thr_act = current_thread();
1010	struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
1011
1012	if (ifps) {
1013	        ifps->fp_valid = value;
1014
1015		if (value == TRUE) {
1016			boolean_t istate = ml_set_interrupts_enabled(FALSE);
1017		        clear_fpu();
1018			ml_set_interrupts_enabled(istate);
1019		}
1020	}
1021}
1022
1023boolean_t
1024ml_fpu_avx_enabled(void) {
1025	return (fpu_YMM_present == TRUE);
1026}
1027