/* * Copyright (c) 2000-2012 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. The rights granted to you under the License * may not be used to create, or enable the creation or redistribution of, * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ */ /* * Mach Operating System * Copyright (c) 1992-1990 Carnegie Mellon University * All Rights Reserved. * * Permission to use, copy, modify and distribute this software and its * documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie Mellon * the rights to redistribute these changes. */ /* */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int fp_kind = FP_NO; /* not inited */ zone_t ifps_zone; /* zone for FPU save area */ #define ALIGNED(addr,size) (((uintptr_t)(addr)&((size)-1))==0) /* Forward */ extern void fpinit(void); extern void fp_save( thread_t thr_act); extern void fp_load( thread_t thr_act); static void configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps); struct x86_avx_thread_state initial_fp_state __attribute((aligned(64))); /* Global MXCSR capability bitmask */ static unsigned int mxcsr_capability_mask; #define fninit() \ __asm__ volatile("fninit") #define fnstcw(control) \ __asm__("fnstcw %0" : "=m" (*(unsigned short *)(control))) #define fldcw(control) \ __asm__ volatile("fldcw %0" : : "m" (*(unsigned short *) &(control)) ) #define fnclex() \ __asm__ volatile("fnclex") #define fnsave(state) \ __asm__ volatile("fnsave %0" : "=m" (*state)) #define frstor(state) \ __asm__ volatile("frstor %0" : : "m" (state)) #define fwait() \ __asm__("fwait"); #define fxrstor(addr) __asm__ __volatile__("fxrstor %0" : : "m" (*(addr))) #define fxsave(addr) __asm__ __volatile__("fxsave %0" : "=m" (*(addr))) static uint32_t fp_register_state_size = 0; static uint32_t fpu_YMM_present = FALSE; static uint32_t cpuid_reevaluated = 0; static void fpu_store_registers(void *, boolean_t); static void fpu_load_registers(void *); extern void xsave64o(void); extern void xrstor64o(void); #define XMASK ((uint32_t) (XFEM_X87 | XFEM_SSE | XFEM_YMM)) static inline void xsetbv(uint32_t mask_hi, uint32_t mask_lo) { __asm__ __volatile__("xsetbv" :: "a"(mask_lo), "d"(mask_hi), "c" (XCR0)); } static inline void xsave(struct x86_fx_thread_state *a) { __asm__ __volatile__("xsave %0" :"=m" (*a) : "a"(XMASK), "d"(0)); } static inline void xrstor(struct x86_fx_thread_state *a) { __asm__ __volatile__("xrstor %0" :: "m" (*a), "a"(XMASK), "d"(0)); } #if DEBUG static inline unsigned short fnstsw(void) { unsigned short status; __asm__ volatile("fnstsw %0" : "=ma" (status)); return(status); } #endif /* * Configure the initial FPU state presented to new threads. * Determine the MXCSR capability mask, which allows us to mask off any * potentially unsafe "reserved" bits before restoring the FPU context. * *Not* per-cpu, assumes symmetry. */ static void configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps) { /* XSAVE requires a 64 byte aligned store */ assert(ALIGNED(fps, 64)); /* Clear, to prepare for the diagnostic FXSAVE */ bzero(fps, sizeof(*fps)); fpinit(); fpu_store_registers(fps, FALSE); mxcsr_capability_mask = fps->fx_MXCSR_MASK; /* Set default mask value if necessary */ if (mxcsr_capability_mask == 0) mxcsr_capability_mask = 0xffbf; /* Clear vector register store */ bzero(&fps->fx_XMM_reg[0][0], sizeof(fps->fx_XMM_reg)); bzero(&fps->x_YMMH_reg[0][0], sizeof(fps->x_YMMH_reg)); fps->fp_valid = TRUE; fps->fp_save_layout = fpu_YMM_present ? XSAVE32: FXSAVE32; fpu_load_registers(fps); /* Poison values to trap unsafe usage */ fps->fp_valid = 0xFFFFFFFF; fps->fp_save_layout = FP_UNUSED; /* Re-enable FPU/SSE DNA exceptions */ set_ts(); } /* * Look for FPU and initialize it. * Called on each CPU. */ void init_fpu(void) { #if DEBUG unsigned short status; unsigned short control; #endif /* * Check for FPU by initializing it, * then trying to read the correct bit patterns from * the control and status registers. */ set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE); /* allow use of FPU */ fninit(); #if DEBUG status = fnstsw(); fnstcw(&control); assert(((status & 0xff) == 0) && ((control & 0x103f) == 0x3f)); #endif /* Advertise SSE support */ if (cpuid_features() & CPUID_FEATURE_FXSR) { fp_kind = FP_FXSR; set_cr4(get_cr4() | CR4_OSFXS); /* And allow SIMD exceptions if present */ if (cpuid_features() & CPUID_FEATURE_SSE) { set_cr4(get_cr4() | CR4_OSXMM); } fp_register_state_size = sizeof(struct x86_fx_thread_state); } else panic("fpu is not FP_FXSR"); /* Configure the XSAVE context mechanism if the processor supports * AVX/YMM registers */ if (cpuid_features() & CPUID_FEATURE_XSAVE) { cpuid_xsave_leaf_t *xsp = &cpuid_info()->cpuid_xsave_leaf; if (xsp->extended_state[0] & (uint32_t)XFEM_YMM) { assert(xsp->extended_state[0] & (uint32_t) XFEM_SSE); /* XSAVE container size for all features */ assert(xsp->extended_state[2] == sizeof(struct x86_avx_thread_state)); fp_register_state_size = sizeof(struct x86_avx_thread_state); fpu_YMM_present = TRUE; set_cr4(get_cr4() | CR4_OSXSAVE); xsetbv(0, XMASK); /* Re-evaluate CPUID, once, to reflect OSXSAVE */ if (OSCompareAndSwap(0, 1, &cpuid_reevaluated)) cpuid_set_info(); /* DRK: consider verifying AVX offset with cpuid(d, ECX:2) */ } } else fpu_YMM_present = FALSE; fpinit(); /* * Trap wait instructions. Turn off FPU for now. */ set_cr0(get_cr0() | CR0_TS | CR0_MP); } /* * Allocate and initialize FP state for current thread. * Don't load state. */ static void * fp_state_alloc(void) { struct x86_fx_thread_state *ifps = zalloc(ifps_zone); #if DEBUG if (!(ALIGNED(ifps,64))) { panic("fp_state_alloc: %p, %u, %p, %u", ifps, (unsigned) ifps_zone->elem_size, (void *) ifps_zone->free_elements, (unsigned) ifps_zone->alloc_size); } #endif bzero(ifps, sizeof(*ifps)); return ifps; } static inline void fp_state_free(void *ifps) { zfree(ifps_zone, ifps); } void clear_fpu(void) { set_ts(); } static void fpu_load_registers(void *fstate) { struct x86_fx_thread_state *ifps = fstate; fp_save_layout_t layout = ifps->fp_save_layout; assert(layout == FXSAVE32 || layout == FXSAVE64 || layout == XSAVE32 || layout == XSAVE64); assert(ALIGNED(ifps, 64)); assert(ml_get_interrupts_enabled() == FALSE); #if DEBUG if (layout == XSAVE32 || layout == XSAVE64) { struct x86_avx_thread_state *iavx = fstate; unsigned i; /* Verify reserved bits in the XSAVE header*/ if (iavx->_xh.xsbv & ~7) panic("iavx->_xh.xsbv: 0x%llx", iavx->_xh.xsbv); for (i = 0; i < sizeof(iavx->_xh.xhrsvd); i++) if (iavx->_xh.xhrsvd[i]) panic("Reserved bit set"); } if (fpu_YMM_present) { if (layout != XSAVE32 && layout != XSAVE64) panic("Inappropriate layout: %u\n", layout); } #endif /* DEBUG */ if ((layout == XSAVE64) || (layout == XSAVE32)) xrstor(ifps); else fxrstor(ifps); } static void fpu_store_registers(void *fstate, boolean_t is64) { struct x86_fx_thread_state *ifps = fstate; assert(ALIGNED(ifps, 64)); if (fpu_YMM_present) { xsave(ifps); ifps->fp_save_layout = is64 ? XSAVE64 : XSAVE32; } else { fxsave(ifps); ifps->fp_save_layout = is64 ? FXSAVE64 : FXSAVE32; } } /* * Initialize FP handling. */ void fpu_module_init(void) { if ((fp_register_state_size != sizeof(struct x86_fx_thread_state)) && (fp_register_state_size != sizeof(struct x86_avx_thread_state))) panic("fpu_module_init: incorrect savearea size %u\n", fp_register_state_size); assert(fpu_YMM_present != 0xFFFFFFFF); /* We explicitly choose an allocation size of 64 * to eliminate waste for the 832 byte sized * AVX XSAVE register save area. */ ifps_zone = zinit(fp_register_state_size, thread_max * fp_register_state_size, 64 * fp_register_state_size, "x86 fpsave state"); /* To maintain the required alignment, disable * zone debugging for this zone as that appends * 16 bytes to each element. */ zone_change(ifps_zone, Z_ALIGNMENT_REQUIRED, TRUE); /* Determine MXCSR reserved bits and configure initial FPU state*/ configure_mxcsr_capability_mask(&initial_fp_state); } /* * Save thread`s FPU context. */ void fpu_save_context(thread_t thread) { struct x86_fx_thread_state *ifps; assert(ml_get_interrupts_enabled() == FALSE); ifps = (thread)->machine.ifps; #if DEBUG if (ifps && ((ifps->fp_valid != FALSE) && (ifps->fp_valid != TRUE))) { panic("ifps->fp_valid: %u\n", ifps->fp_valid); } #endif if (ifps != 0 && (ifps->fp_valid == FALSE)) { /* Clear CR0.TS in preparation for the FP context save. In * theory, this shouldn't be necessary since a live FPU should * indicate that TS is clear. However, various routines * (such as sendsig & sigreturn) manipulate TS directly. */ clear_ts(); /* registers are in FPU - save to memory */ fpu_store_registers(ifps, (thread_is_64bit(thread) && is_saved_state64(thread->machine.iss))); ifps->fp_valid = TRUE; } set_ts(); } /* * Free a FPU save area. * Called only when thread terminating - no locking necessary. */ void fpu_free(void *fps) { fp_state_free(fps); } /* * Set the floating-point state for a thread based * on the FXSave formatted data. This is basically * the same as fpu_set_state except it uses the * expanded data structure. * If the thread is not the current thread, it is * not running (held). Locking needed against * concurrent fpu_set_state or fpu_get_state. */ kern_return_t fpu_set_fxstate( thread_t thr_act, thread_state_t tstate, thread_flavor_t f) { struct x86_fx_thread_state *ifps; struct x86_fx_thread_state *new_ifps; x86_float_state64_t *state; pcb_t pcb; size_t state_size = sizeof(struct x86_fx_thread_state); boolean_t old_valid, fresh_state = FALSE; if (fp_kind == FP_NO) return KERN_FAILURE; if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) && !ml_fpu_avx_enabled()) return KERN_FAILURE; state = (x86_float_state64_t *)tstate; assert(thr_act != THREAD_NULL); pcb = THREAD_TO_PCB(thr_act); if (state == NULL) { /* * new FPU state is 'invalid'. * Deallocate the fp state if it exists. */ simple_lock(&pcb->lock); ifps = pcb->ifps; pcb->ifps = 0; simple_unlock(&pcb->lock); if (ifps != 0) { fp_state_free(ifps); } } else { /* * Valid incoming state. Allocate the fp state if there is none. */ new_ifps = 0; Retry: simple_lock(&pcb->lock); ifps = pcb->ifps; if (ifps == 0) { if (new_ifps == 0) { simple_unlock(&pcb->lock); new_ifps = fp_state_alloc(); goto Retry; } ifps = new_ifps; new_ifps = 0; pcb->ifps = ifps; fresh_state = TRUE; } /* * now copy over the new data. */ old_valid = ifps->fp_valid; #if DEBUG || DEVELOPMENT if ((fresh_state == FALSE) && (old_valid == FALSE) && (thr_act != current_thread())) { panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act); } #endif /* * Clear any reserved bits in the MXCSR to prevent a GPF * when issuing an FXRSTOR. */ state->fpu_mxcsr &= mxcsr_capability_mask; bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size); if (fpu_YMM_present) { struct x86_avx_thread_state *iavx = (void *) ifps; uint32_t fpu_nyreg = 0; if (f == x86_AVX_STATE32) fpu_nyreg = 8; else if (f == x86_AVX_STATE64) fpu_nyreg = 16; if (fpu_nyreg) { x86_avx_state64_t *ystate = (x86_avx_state64_t *) state; bcopy(&ystate->__fpu_ymmh0, &iavx->x_YMMH_reg[0][0], fpu_nyreg * sizeof(_STRUCT_XMM_REG)); } iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32; /* Sanitize XSAVE header */ bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd)); if (fpu_nyreg) iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87); else iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87); } else { ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32; } ifps->fp_valid = old_valid; if (old_valid == FALSE) { boolean_t istate = ml_set_interrupts_enabled(FALSE); ifps->fp_valid = TRUE; /* If altering the current thread's state, disable FPU */ if (thr_act == current_thread()) set_ts(); ml_set_interrupts_enabled(istate); } simple_unlock(&pcb->lock); if (new_ifps != 0) fp_state_free(new_ifps); } return KERN_SUCCESS; } /* * Get the floating-point state for a thread. * If the thread is not the current thread, it is * not running (held). Locking needed against * concurrent fpu_set_state or fpu_get_state. */ kern_return_t fpu_get_fxstate( thread_t thr_act, thread_state_t tstate, thread_flavor_t f) { struct x86_fx_thread_state *ifps; x86_float_state64_t *state; kern_return_t ret = KERN_FAILURE; pcb_t pcb; size_t state_size = sizeof(struct x86_fx_thread_state); if (fp_kind == FP_NO) return KERN_FAILURE; if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) && !ml_fpu_avx_enabled()) return KERN_FAILURE; state = (x86_float_state64_t *)tstate; assert(thr_act != THREAD_NULL); pcb = THREAD_TO_PCB(thr_act); simple_lock(&pcb->lock); ifps = pcb->ifps; if (ifps == 0) { /* * No valid floating-point state. */ bcopy((char *)&initial_fp_state, (char *)&state->fpu_fcw, state_size); simple_unlock(&pcb->lock); return KERN_SUCCESS; } /* * Make sure we`ve got the latest fp state info * If the live fpu state belongs to our target */ if (thr_act == current_thread()) { boolean_t intr; intr = ml_set_interrupts_enabled(FALSE); clear_ts(); fp_save(thr_act); clear_fpu(); (void)ml_set_interrupts_enabled(intr); } if (ifps->fp_valid) { bcopy((char *)ifps, (char *)&state->fpu_fcw, state_size); if (fpu_YMM_present) { struct x86_avx_thread_state *iavx = (void *) ifps; uint32_t fpu_nyreg = 0; if (f == x86_AVX_STATE32) fpu_nyreg = 8; else if (f == x86_AVX_STATE64) fpu_nyreg = 16; if (fpu_nyreg) { x86_avx_state64_t *ystate = (x86_avx_state64_t *) state; bcopy(&iavx->x_YMMH_reg[0][0], &ystate->__fpu_ymmh0, fpu_nyreg * sizeof(_STRUCT_XMM_REG)); } } ret = KERN_SUCCESS; } simple_unlock(&pcb->lock); return ret; } /* * the child thread is 'stopped' with the thread * mutex held and is currently not known by anyone * so no way for fpu state to get manipulated by an * outside agency -> no need for pcb lock */ void fpu_dup_fxstate( thread_t parent, thread_t child) { struct x86_fx_thread_state *new_ifps = NULL; boolean_t intr; pcb_t ppcb; ppcb = THREAD_TO_PCB(parent); if (ppcb->ifps == NULL) return; if (child->machine.ifps) panic("fpu_dup_fxstate: child's ifps non-null"); new_ifps = fp_state_alloc(); simple_lock(&ppcb->lock); if (ppcb->ifps != NULL) { struct x86_fx_thread_state *ifps = ppcb->ifps; /* * Make sure we`ve got the latest fp state info */ intr = ml_set_interrupts_enabled(FALSE); assert(current_thread() == parent); clear_ts(); fp_save(parent); clear_fpu(); (void)ml_set_interrupts_enabled(intr); if (ifps->fp_valid) { child->machine.ifps = new_ifps; assert((fp_register_state_size == sizeof(struct x86_fx_thread_state)) || (fp_register_state_size == sizeof(struct x86_avx_thread_state))); bcopy((char *)(ppcb->ifps), (char *)(child->machine.ifps), fp_register_state_size); /* Mark the new fp saved state as non-live. */ /* Temporarily disabled: radar 4647827 * new_ifps->fp_valid = TRUE; */ /* * Clear any reserved bits in the MXCSR to prevent a GPF * when issuing an FXRSTOR. */ new_ifps->fx_MXCSR &= mxcsr_capability_mask; new_ifps = NULL; } } simple_unlock(&ppcb->lock); if (new_ifps != NULL) fp_state_free(new_ifps); } /* * Initialize FPU. * */ void fpinit(void) { unsigned short control; clear_ts(); fninit(); fnstcw(&control); control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */ control |= (FPC_PC_64 | /* Set precision */ FPC_RC_RN | /* round-to-nearest */ FPC_ZE | /* Suppress zero-divide */ FPC_OE | /* and overflow */ FPC_UE | /* underflow */ FPC_IE | /* Allow NaNQs and +-INF */ FPC_DE | /* Allow denorms as operands */ FPC_PE); /* No trap for precision loss */ fldcw(control); /* Initialize SSE/SSE2 */ __builtin_ia32_ldmxcsr(0x1f80); } /* * Coprocessor not present. */ void fpnoextflt(void) { boolean_t intr; thread_t thr_act; pcb_t pcb; struct x86_fx_thread_state *ifps = 0; thr_act = current_thread(); pcb = THREAD_TO_PCB(thr_act); assert(fp_register_state_size != 0); if (pcb->ifps == 0 && !get_interrupt_level()) { ifps = fp_state_alloc(); bcopy((char *)&initial_fp_state, (char *)ifps, fp_register_state_size); if (!thread_is_64bit(thr_act)) { ifps->fp_save_layout = fpu_YMM_present ? XSAVE32 : FXSAVE32; } else ifps->fp_save_layout = fpu_YMM_present ? XSAVE64 : FXSAVE64; ifps->fp_valid = TRUE; } intr = ml_set_interrupts_enabled(FALSE); clear_ts(); /* Enable FPU use */ if (__improbable(get_interrupt_level())) { /* * Save current coprocessor context if valid * Initialize coprocessor live context */ fp_save(thr_act); fpinit(); } else { if (pcb->ifps == 0) { pcb->ifps = ifps; ifps = 0; } /* * Load this thread`s state into coprocessor live context. */ fp_load(thr_act); } (void)ml_set_interrupts_enabled(intr); if (ifps) fp_state_free(ifps); } /* * FPU overran end of segment. * Re-initialize FPU. Floating point state is not valid. */ void fpextovrflt(void) { thread_t thr_act = current_thread(); pcb_t pcb; struct x86_fx_thread_state *ifps; boolean_t intr; intr = ml_set_interrupts_enabled(FALSE); if (get_interrupt_level()) panic("FPU segment overrun exception at interrupt context\n"); if (current_task() == kernel_task) panic("FPU segment overrun exception in kernel thread context\n"); /* * This is a non-recoverable error. * Invalidate the thread`s FPU state. */ pcb = THREAD_TO_PCB(thr_act); simple_lock(&pcb->lock); ifps = pcb->ifps; pcb->ifps = 0; simple_unlock(&pcb->lock); /* * Re-initialize the FPU. */ clear_ts(); fninit(); /* * And disable access. */ clear_fpu(); (void)ml_set_interrupts_enabled(intr); if (ifps) zfree(ifps_zone, ifps); /* * Raise exception. */ i386_exception(EXC_BAD_ACCESS, VM_PROT_READ|VM_PROT_EXECUTE, 0); /*NOTREACHED*/ } /* * FPU error. Called by AST. */ void fpexterrflt(void) { thread_t thr_act = current_thread(); struct x86_fx_thread_state *ifps = thr_act->machine.ifps; boolean_t intr; intr = ml_set_interrupts_enabled(FALSE); if (get_interrupt_level()) panic("FPU error exception at interrupt context\n"); if (current_task() == kernel_task) panic("FPU error exception in kernel thread context\n"); /* * Save the FPU state and turn off the FPU. */ fp_save(thr_act); (void)ml_set_interrupts_enabled(intr); /* * Raise FPU exception. * Locking not needed on pcb->ifps, * since thread is running. */ i386_exception(EXC_ARITHMETIC, EXC_I386_EXTERR, ifps->fx_status); /*NOTREACHED*/ } /* * Save FPU state. * * Locking not needed: * . if called from fpu_get_state, pcb already locked. * . if called from fpnoextflt or fp_intr, we are single-cpu * . otherwise, thread is running. * N.B.: Must be called with interrupts disabled */ void fp_save( thread_t thr_act) { pcb_t pcb = THREAD_TO_PCB(thr_act); struct x86_fx_thread_state *ifps = pcb->ifps; assert(ifps != 0); if (ifps != 0 && !ifps->fp_valid) { assert((get_cr0() & CR0_TS) == 0); /* registers are in FPU */ ifps->fp_valid = TRUE; fpu_store_registers(ifps, thread_is_64bit(thr_act)); } } /* * Restore FPU state from PCB. * * Locking not needed; always called on the current thread. */ void fp_load( thread_t thr_act) { pcb_t pcb = THREAD_TO_PCB(thr_act); struct x86_fx_thread_state *ifps = pcb->ifps; assert(ifps); #if DEBUG if (ifps->fp_valid != FALSE && ifps->fp_valid != TRUE) { panic("fp_load() invalid fp_valid: %u, fp_save_layout: %u\n", ifps->fp_valid, ifps->fp_save_layout); } #endif if (ifps->fp_valid == FALSE) { fpinit(); } else { fpu_load_registers(ifps); } ifps->fp_valid = FALSE; /* in FPU */ } /* * SSE arithmetic exception handling code. * Basically the same as the x87 exception handler with a different subtype */ void fpSSEexterrflt(void) { thread_t thr_act = current_thread(); struct x86_fx_thread_state *ifps = thr_act->machine.ifps; boolean_t intr; intr = ml_set_interrupts_enabled(FALSE); if (get_interrupt_level()) panic("SSE exception at interrupt context\n"); if (current_task() == kernel_task) panic("SSE exception in kernel thread context\n"); /* * Save the FPU state and turn off the FPU. */ fp_save(thr_act); (void)ml_set_interrupts_enabled(intr); /* * Raise FPU exception. * Locking not needed on pcb->ifps, * since thread is running. */ i386_exception(EXC_ARITHMETIC, EXC_I386_SSEEXTERR, ifps->fx_MXCSR); /*NOTREACHED*/ } void fp_setvalid(boolean_t value) { thread_t thr_act = current_thread(); struct x86_fx_thread_state *ifps = thr_act->machine.ifps; if (ifps) { ifps->fp_valid = value; if (value == TRUE) { boolean_t istate = ml_set_interrupts_enabled(FALSE); clear_fpu(); ml_set_interrupts_enabled(istate); } } } boolean_t ml_fpu_avx_enabled(void) { return (fpu_YMM_present == TRUE); }