1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1992-1990 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58 59 60#include <mach/exception_types.h> 61#include <mach/i386/thread_status.h> 62#include <mach/i386/fp_reg.h> 63#include <mach/branch_predicates.h> 64 65#include <kern/mach_param.h> 66#include <kern/processor.h> 67#include <kern/thread.h> 68#include <kern/zalloc.h> 69#include <kern/misc_protos.h> 70#include <kern/spl.h> 71#include <kern/assert.h> 72 73#include <libkern/OSAtomic.h> 74 75#include <architecture/i386/pio.h> 76#include <i386/cpuid.h> 77#include <i386/fpu.h> 78#include <i386/proc_reg.h> 79#include <i386/misc_protos.h> 80#include <i386/thread.h> 81#include <i386/trap.h> 82 83int fp_kind = FP_NO; /* not inited */ 84zone_t ifps_zone; /* zone for FPU save area */ 85 86#define ALIGNED(addr,size) (((uintptr_t)(addr)&((size)-1))==0) 87 88/* Forward */ 89 90extern void fpinit(void); 91extern void fp_save( 92 thread_t thr_act); 93extern void fp_load( 94 thread_t thr_act); 95 96static void configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps); 97 98struct x86_avx_thread_state initial_fp_state __attribute((aligned(64))); 99 100 101/* Global MXCSR capability bitmask */ 102static unsigned int mxcsr_capability_mask; 103 104#define fninit() \ 105 __asm__ volatile("fninit") 106 107#define fnstcw(control) \ 108 __asm__("fnstcw %0" : "=m" (*(unsigned short *)(control))) 109 110#define fldcw(control) \ 111 __asm__ volatile("fldcw %0" : : "m" (*(unsigned short *) &(control)) ) 112 113#define fnclex() \ 114 __asm__ volatile("fnclex") 115 116#define fnsave(state) \ 117 __asm__ volatile("fnsave %0" : "=m" (*state)) 118 119#define frstor(state) \ 120 __asm__ volatile("frstor %0" : : "m" (state)) 121 122#define fwait() \ 123 __asm__("fwait"); 124 125#define fxrstor(addr) __asm__ __volatile__("fxrstor %0" : : "m" (*(addr))) 126#define fxsave(addr) __asm__ __volatile__("fxsave %0" : "=m" (*(addr))) 127 128static uint32_t fp_register_state_size = 0; 129static uint32_t fpu_YMM_present = FALSE; 130static uint32_t cpuid_reevaluated = 0; 131 132static void fpu_store_registers(void *, boolean_t); 133static void fpu_load_registers(void *); 134 135extern void xsave64o(void); 136extern void xrstor64o(void); 137 138#define XMASK ((uint32_t) (XFEM_X87 | XFEM_SSE | XFEM_YMM)) 139 140static inline void xsetbv(uint32_t mask_hi, uint32_t mask_lo) { 141 __asm__ __volatile__("xsetbv" :: "a"(mask_lo), "d"(mask_hi), "c" (XCR0)); 142} 143 144static inline void xsave(struct x86_fx_thread_state *a) { 145 __asm__ __volatile__("xsave %0" :"=m" (*a) : "a"(XMASK), "d"(0)); 146} 147 148static inline void xrstor(struct x86_fx_thread_state *a) { 149 __asm__ __volatile__("xrstor %0" :: "m" (*a), "a"(XMASK), "d"(0)); 150} 151 152#if DEBUG 153static inline unsigned short 154fnstsw(void) 155{ 156 unsigned short status; 157 __asm__ volatile("fnstsw %0" : "=ma" (status)); 158 return(status); 159} 160#endif 161 162/* 163 * Configure the initial FPU state presented to new threads. 164 * Determine the MXCSR capability mask, which allows us to mask off any 165 * potentially unsafe "reserved" bits before restoring the FPU context. 166 * *Not* per-cpu, assumes symmetry. 167 */ 168 169static void 170configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps) 171{ 172 /* XSAVE requires a 64 byte aligned store */ 173 assert(ALIGNED(fps, 64)); 174 /* Clear, to prepare for the diagnostic FXSAVE */ 175 bzero(fps, sizeof(*fps)); 176 177 fpinit(); 178 fpu_store_registers(fps, FALSE); 179 180 mxcsr_capability_mask = fps->fx_MXCSR_MASK; 181 182 /* Set default mask value if necessary */ 183 if (mxcsr_capability_mask == 0) 184 mxcsr_capability_mask = 0xffbf; 185 186 /* Clear vector register store */ 187 bzero(&fps->fx_XMM_reg[0][0], sizeof(fps->fx_XMM_reg)); 188 bzero(&fps->x_YMMH_reg[0][0], sizeof(fps->x_YMMH_reg)); 189 190 fps->fp_valid = TRUE; 191 fps->fp_save_layout = fpu_YMM_present ? XSAVE32: FXSAVE32; 192 fpu_load_registers(fps); 193 194 /* Poison values to trap unsafe usage */ 195 fps->fp_valid = 0xFFFFFFFF; 196 fps->fp_save_layout = FP_UNUSED; 197 198 /* Re-enable FPU/SSE DNA exceptions */ 199 set_ts(); 200} 201 202 203/* 204 * Look for FPU and initialize it. 205 * Called on each CPU. 206 */ 207void 208init_fpu(void) 209{ 210#if DEBUG 211 unsigned short status; 212 unsigned short control; 213#endif 214 /* 215 * Check for FPU by initializing it, 216 * then trying to read the correct bit patterns from 217 * the control and status registers. 218 */ 219 set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE); /* allow use of FPU */ 220 fninit(); 221#if DEBUG 222 status = fnstsw(); 223 fnstcw(&control); 224 225 assert(((status & 0xff) == 0) && ((control & 0x103f) == 0x3f)); 226#endif 227 /* Advertise SSE support */ 228 if (cpuid_features() & CPUID_FEATURE_FXSR) { 229 fp_kind = FP_FXSR; 230 set_cr4(get_cr4() | CR4_OSFXS); 231 /* And allow SIMD exceptions if present */ 232 if (cpuid_features() & CPUID_FEATURE_SSE) { 233 set_cr4(get_cr4() | CR4_OSXMM); 234 } 235 fp_register_state_size = sizeof(struct x86_fx_thread_state); 236 237 } else 238 panic("fpu is not FP_FXSR"); 239 240 /* Configure the XSAVE context mechanism if the processor supports 241 * AVX/YMM registers 242 */ 243 if (cpuid_features() & CPUID_FEATURE_XSAVE) { 244 cpuid_xsave_leaf_t *xsp = &cpuid_info()->cpuid_xsave_leaf; 245 if (xsp->extended_state[0] & (uint32_t)XFEM_YMM) { 246 assert(xsp->extended_state[0] & (uint32_t) XFEM_SSE); 247 /* XSAVE container size for all features */ 248 assert(xsp->extended_state[2] == sizeof(struct x86_avx_thread_state)); 249 fp_register_state_size = sizeof(struct x86_avx_thread_state); 250 fpu_YMM_present = TRUE; 251 set_cr4(get_cr4() | CR4_OSXSAVE); 252 xsetbv(0, XMASK); 253 /* Re-evaluate CPUID, once, to reflect OSXSAVE */ 254 if (OSCompareAndSwap(0, 1, &cpuid_reevaluated)) 255 cpuid_set_info(); 256 /* DRK: consider verifying AVX offset with cpuid(d, ECX:2) */ 257 } 258 } 259 else 260 fpu_YMM_present = FALSE; 261 262 fpinit(); 263 264 /* 265 * Trap wait instructions. Turn off FPU for now. 266 */ 267 set_cr0(get_cr0() | CR0_TS | CR0_MP); 268} 269 270/* 271 * Allocate and initialize FP state for current thread. 272 * Don't load state. 273 */ 274static void * 275fp_state_alloc(void) 276{ 277 struct x86_fx_thread_state *ifps = zalloc(ifps_zone); 278 279#if DEBUG 280 if (!(ALIGNED(ifps,64))) { 281 panic("fp_state_alloc: %p, %u, %p, %u", ifps, (unsigned) ifps_zone->elem_size, (void *) ifps_zone->free_elements, (unsigned) ifps_zone->alloc_size); 282 } 283#endif 284 bzero(ifps, sizeof(*ifps)); 285 return ifps; 286} 287 288static inline void 289fp_state_free(void *ifps) 290{ 291 zfree(ifps_zone, ifps); 292} 293 294void clear_fpu(void) 295{ 296 set_ts(); 297} 298 299 300static void fpu_load_registers(void *fstate) { 301 struct x86_fx_thread_state *ifps = fstate; 302 fp_save_layout_t layout = ifps->fp_save_layout; 303 304 assert(layout == FXSAVE32 || layout == FXSAVE64 || layout == XSAVE32 || layout == XSAVE64); 305 assert(ALIGNED(ifps, 64)); 306 assert(ml_get_interrupts_enabled() == FALSE); 307 308#if DEBUG 309 if (layout == XSAVE32 || layout == XSAVE64) { 310 struct x86_avx_thread_state *iavx = fstate; 311 unsigned i; 312 /* Verify reserved bits in the XSAVE header*/ 313 if (iavx->_xh.xsbv & ~7) 314 panic("iavx->_xh.xsbv: 0x%llx", iavx->_xh.xsbv); 315 for (i = 0; i < sizeof(iavx->_xh.xhrsvd); i++) 316 if (iavx->_xh.xhrsvd[i]) 317 panic("Reserved bit set"); 318 } 319 if (fpu_YMM_present) { 320 if (layout != XSAVE32 && layout != XSAVE64) 321 panic("Inappropriate layout: %u\n", layout); 322 } 323#endif /* DEBUG */ 324 325 if ((layout == XSAVE64) || (layout == XSAVE32)) 326 xrstor(ifps); 327 else 328 fxrstor(ifps); 329} 330 331static void fpu_store_registers(void *fstate, boolean_t is64) { 332 struct x86_fx_thread_state *ifps = fstate; 333 assert(ALIGNED(ifps, 64)); 334 if (fpu_YMM_present) { 335 xsave(ifps); 336 ifps->fp_save_layout = is64 ? XSAVE64 : XSAVE32; 337 } 338 else { 339 fxsave(ifps); 340 ifps->fp_save_layout = is64 ? FXSAVE64 : FXSAVE32; 341 } 342} 343 344/* 345 * Initialize FP handling. 346 */ 347 348void 349fpu_module_init(void) 350{ 351 if ((fp_register_state_size != sizeof(struct x86_fx_thread_state)) && 352 (fp_register_state_size != sizeof(struct x86_avx_thread_state))) 353 panic("fpu_module_init: incorrect savearea size %u\n", fp_register_state_size); 354 355 assert(fpu_YMM_present != 0xFFFFFFFF); 356 357 /* We explicitly choose an allocation size of 64 358 * to eliminate waste for the 832 byte sized 359 * AVX XSAVE register save area. 360 */ 361 ifps_zone = zinit(fp_register_state_size, 362 thread_max * fp_register_state_size, 363 64 * fp_register_state_size, 364 "x86 fpsave state"); 365 366 /* To maintain the required alignment, disable 367 * zone debugging for this zone as that appends 368 * 16 bytes to each element. 369 */ 370 zone_change(ifps_zone, Z_ALIGNMENT_REQUIRED, TRUE); 371 /* Determine MXCSR reserved bits and configure initial FPU state*/ 372 configure_mxcsr_capability_mask(&initial_fp_state); 373} 374 375/* 376 * Save thread`s FPU context. 377 */ 378void 379fpu_save_context(thread_t thread) 380{ 381 struct x86_fx_thread_state *ifps; 382 383 assert(ml_get_interrupts_enabled() == FALSE); 384 ifps = (thread)->machine.ifps; 385#if DEBUG 386 if (ifps && ((ifps->fp_valid != FALSE) && (ifps->fp_valid != TRUE))) { 387 panic("ifps->fp_valid: %u\n", ifps->fp_valid); 388 } 389#endif 390 if (ifps != 0 && (ifps->fp_valid == FALSE)) { 391 /* Clear CR0.TS in preparation for the FP context save. In 392 * theory, this shouldn't be necessary since a live FPU should 393 * indicate that TS is clear. However, various routines 394 * (such as sendsig & sigreturn) manipulate TS directly. 395 */ 396 clear_ts(); 397 /* registers are in FPU - save to memory */ 398 fpu_store_registers(ifps, (thread_is_64bit(thread) && is_saved_state64(thread->machine.iss))); 399 ifps->fp_valid = TRUE; 400 } 401 set_ts(); 402} 403 404 405/* 406 * Free a FPU save area. 407 * Called only when thread terminating - no locking necessary. 408 */ 409void 410fpu_free(void *fps) 411{ 412 fp_state_free(fps); 413} 414 415/* 416 * Set the floating-point state for a thread based 417 * on the FXSave formatted data. This is basically 418 * the same as fpu_set_state except it uses the 419 * expanded data structure. 420 * If the thread is not the current thread, it is 421 * not running (held). Locking needed against 422 * concurrent fpu_set_state or fpu_get_state. 423 */ 424kern_return_t 425fpu_set_fxstate( 426 thread_t thr_act, 427 thread_state_t tstate, 428 thread_flavor_t f) 429{ 430 struct x86_fx_thread_state *ifps; 431 struct x86_fx_thread_state *new_ifps; 432 x86_float_state64_t *state; 433 pcb_t pcb; 434 size_t state_size = sizeof(struct x86_fx_thread_state); 435 boolean_t old_valid, fresh_state = FALSE; 436 437 if (fp_kind == FP_NO) 438 return KERN_FAILURE; 439 440 if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) && 441 !ml_fpu_avx_enabled()) 442 return KERN_FAILURE; 443 444 state = (x86_float_state64_t *)tstate; 445 446 assert(thr_act != THREAD_NULL); 447 pcb = THREAD_TO_PCB(thr_act); 448 449 if (state == NULL) { 450 /* 451 * new FPU state is 'invalid'. 452 * Deallocate the fp state if it exists. 453 */ 454 simple_lock(&pcb->lock); 455 456 ifps = pcb->ifps; 457 pcb->ifps = 0; 458 459 simple_unlock(&pcb->lock); 460 461 if (ifps != 0) { 462 fp_state_free(ifps); 463 } 464 } else { 465 /* 466 * Valid incoming state. Allocate the fp state if there is none. 467 */ 468 new_ifps = 0; 469 Retry: 470 simple_lock(&pcb->lock); 471 472 ifps = pcb->ifps; 473 if (ifps == 0) { 474 if (new_ifps == 0) { 475 simple_unlock(&pcb->lock); 476 new_ifps = fp_state_alloc(); 477 goto Retry; 478 } 479 ifps = new_ifps; 480 new_ifps = 0; 481 pcb->ifps = ifps; 482 fresh_state = TRUE; 483 } 484 485 /* 486 * now copy over the new data. 487 */ 488 489 old_valid = ifps->fp_valid; 490 491#if DEBUG || DEVELOPMENT 492 if ((fresh_state == FALSE) && (old_valid == FALSE) && (thr_act != current_thread())) { 493 panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act); 494 } 495#endif 496 /* 497 * Clear any reserved bits in the MXCSR to prevent a GPF 498 * when issuing an FXRSTOR. 499 */ 500 501 state->fpu_mxcsr &= mxcsr_capability_mask; 502 503 bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size); 504 505 if (fpu_YMM_present) { 506 struct x86_avx_thread_state *iavx = (void *) ifps; 507 uint32_t fpu_nyreg = 0; 508 509 if (f == x86_AVX_STATE32) 510 fpu_nyreg = 8; 511 else if (f == x86_AVX_STATE64) 512 fpu_nyreg = 16; 513 514 if (fpu_nyreg) { 515 x86_avx_state64_t *ystate = (x86_avx_state64_t *) state; 516 bcopy(&ystate->__fpu_ymmh0, &iavx->x_YMMH_reg[0][0], fpu_nyreg * sizeof(_STRUCT_XMM_REG)); 517 } 518 519 iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32; 520 /* Sanitize XSAVE header */ 521 bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd)); 522 if (fpu_nyreg) 523 iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87); 524 else 525 iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87); 526 } else { 527 ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32; 528 } 529 ifps->fp_valid = old_valid; 530 531 if (old_valid == FALSE) { 532 boolean_t istate = ml_set_interrupts_enabled(FALSE); 533 ifps->fp_valid = TRUE; 534 /* If altering the current thread's state, disable FPU */ 535 if (thr_act == current_thread()) 536 set_ts(); 537 538 ml_set_interrupts_enabled(istate); 539 } 540 541 simple_unlock(&pcb->lock); 542 543 if (new_ifps != 0) 544 fp_state_free(new_ifps); 545 } 546 return KERN_SUCCESS; 547} 548 549/* 550 * Get the floating-point state for a thread. 551 * If the thread is not the current thread, it is 552 * not running (held). Locking needed against 553 * concurrent fpu_set_state or fpu_get_state. 554 */ 555kern_return_t 556fpu_get_fxstate( 557 thread_t thr_act, 558 thread_state_t tstate, 559 thread_flavor_t f) 560{ 561 struct x86_fx_thread_state *ifps; 562 x86_float_state64_t *state; 563 kern_return_t ret = KERN_FAILURE; 564 pcb_t pcb; 565 size_t state_size = sizeof(struct x86_fx_thread_state); 566 567 if (fp_kind == FP_NO) 568 return KERN_FAILURE; 569 570 if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) && 571 !ml_fpu_avx_enabled()) 572 return KERN_FAILURE; 573 574 state = (x86_float_state64_t *)tstate; 575 576 assert(thr_act != THREAD_NULL); 577 pcb = THREAD_TO_PCB(thr_act); 578 579 simple_lock(&pcb->lock); 580 581 ifps = pcb->ifps; 582 if (ifps == 0) { 583 /* 584 * No valid floating-point state. 585 */ 586 587 bcopy((char *)&initial_fp_state, (char *)&state->fpu_fcw, 588 state_size); 589 590 simple_unlock(&pcb->lock); 591 592 return KERN_SUCCESS; 593 } 594 /* 595 * Make sure we`ve got the latest fp state info 596 * If the live fpu state belongs to our target 597 */ 598 if (thr_act == current_thread()) { 599 boolean_t intr; 600 601 intr = ml_set_interrupts_enabled(FALSE); 602 603 clear_ts(); 604 fp_save(thr_act); 605 clear_fpu(); 606 607 (void)ml_set_interrupts_enabled(intr); 608 } 609 if (ifps->fp_valid) { 610 bcopy((char *)ifps, (char *)&state->fpu_fcw, state_size); 611 if (fpu_YMM_present) { 612 struct x86_avx_thread_state *iavx = (void *) ifps; 613 uint32_t fpu_nyreg = 0; 614 615 if (f == x86_AVX_STATE32) 616 fpu_nyreg = 8; 617 else if (f == x86_AVX_STATE64) 618 fpu_nyreg = 16; 619 620 if (fpu_nyreg) { 621 x86_avx_state64_t *ystate = (x86_avx_state64_t *) state; 622 bcopy(&iavx->x_YMMH_reg[0][0], &ystate->__fpu_ymmh0, fpu_nyreg * sizeof(_STRUCT_XMM_REG)); 623 } 624 } 625 626 ret = KERN_SUCCESS; 627 } 628 simple_unlock(&pcb->lock); 629 630 return ret; 631} 632 633 634 635/* 636 * the child thread is 'stopped' with the thread 637 * mutex held and is currently not known by anyone 638 * so no way for fpu state to get manipulated by an 639 * outside agency -> no need for pcb lock 640 */ 641 642void 643fpu_dup_fxstate( 644 thread_t parent, 645 thread_t child) 646{ 647 struct x86_fx_thread_state *new_ifps = NULL; 648 boolean_t intr; 649 pcb_t ppcb; 650 651 ppcb = THREAD_TO_PCB(parent); 652 653 if (ppcb->ifps == NULL) 654 return; 655 656 if (child->machine.ifps) 657 panic("fpu_dup_fxstate: child's ifps non-null"); 658 659 new_ifps = fp_state_alloc(); 660 661 simple_lock(&ppcb->lock); 662 663 if (ppcb->ifps != NULL) { 664 struct x86_fx_thread_state *ifps = ppcb->ifps; 665 /* 666 * Make sure we`ve got the latest fp state info 667 */ 668 intr = ml_set_interrupts_enabled(FALSE); 669 assert(current_thread() == parent); 670 clear_ts(); 671 fp_save(parent); 672 clear_fpu(); 673 674 (void)ml_set_interrupts_enabled(intr); 675 676 if (ifps->fp_valid) { 677 child->machine.ifps = new_ifps; 678 assert((fp_register_state_size == sizeof(struct x86_fx_thread_state)) || 679 (fp_register_state_size == sizeof(struct x86_avx_thread_state))); 680 bcopy((char *)(ppcb->ifps), 681 (char *)(child->machine.ifps), fp_register_state_size); 682 683 /* Mark the new fp saved state as non-live. */ 684 /* Temporarily disabled: radar 4647827 685 * new_ifps->fp_valid = TRUE; 686 */ 687 688 /* 689 * Clear any reserved bits in the MXCSR to prevent a GPF 690 * when issuing an FXRSTOR. 691 */ 692 new_ifps->fx_MXCSR &= mxcsr_capability_mask; 693 new_ifps = NULL; 694 } 695 } 696 simple_unlock(&ppcb->lock); 697 698 if (new_ifps != NULL) 699 fp_state_free(new_ifps); 700} 701 702 703/* 704 * Initialize FPU. 705 * 706 */ 707 708void 709fpinit(void) 710{ 711 unsigned short control; 712 713 clear_ts(); 714 fninit(); 715 fnstcw(&control); 716 control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */ 717 control |= (FPC_PC_64 | /* Set precision */ 718 FPC_RC_RN | /* round-to-nearest */ 719 FPC_ZE | /* Suppress zero-divide */ 720 FPC_OE | /* and overflow */ 721 FPC_UE | /* underflow */ 722 FPC_IE | /* Allow NaNQs and +-INF */ 723 FPC_DE | /* Allow denorms as operands */ 724 FPC_PE); /* No trap for precision loss */ 725 fldcw(control); 726 727 /* Initialize SSE/SSE2 */ 728 __builtin_ia32_ldmxcsr(0x1f80); 729} 730 731/* 732 * Coprocessor not present. 733 */ 734 735void 736fpnoextflt(void) 737{ 738 boolean_t intr; 739 thread_t thr_act; 740 pcb_t pcb; 741 struct x86_fx_thread_state *ifps = 0; 742 743 thr_act = current_thread(); 744 pcb = THREAD_TO_PCB(thr_act); 745 746 assert(fp_register_state_size != 0); 747 748 if (pcb->ifps == 0 && !get_interrupt_level()) { 749 ifps = fp_state_alloc(); 750 bcopy((char *)&initial_fp_state, (char *)ifps, 751 fp_register_state_size); 752 if (!thread_is_64bit(thr_act)) { 753 ifps->fp_save_layout = fpu_YMM_present ? XSAVE32 : FXSAVE32; 754 } 755 else 756 ifps->fp_save_layout = fpu_YMM_present ? XSAVE64 : FXSAVE64; 757 ifps->fp_valid = TRUE; 758 } 759 intr = ml_set_interrupts_enabled(FALSE); 760 761 clear_ts(); /* Enable FPU use */ 762 763 if (__improbable(get_interrupt_level())) { 764 /* 765 * Save current coprocessor context if valid 766 * Initialize coprocessor live context 767 */ 768 fp_save(thr_act); 769 fpinit(); 770 } else { 771 if (pcb->ifps == 0) { 772 pcb->ifps = ifps; 773 ifps = 0; 774 } 775 /* 776 * Load this thread`s state into coprocessor live context. 777 */ 778 fp_load(thr_act); 779 } 780 (void)ml_set_interrupts_enabled(intr); 781 782 if (ifps) 783 fp_state_free(ifps); 784} 785 786/* 787 * FPU overran end of segment. 788 * Re-initialize FPU. Floating point state is not valid. 789 */ 790 791void 792fpextovrflt(void) 793{ 794 thread_t thr_act = current_thread(); 795 pcb_t pcb; 796 struct x86_fx_thread_state *ifps; 797 boolean_t intr; 798 799 intr = ml_set_interrupts_enabled(FALSE); 800 801 if (get_interrupt_level()) 802 panic("FPU segment overrun exception at interrupt context\n"); 803 if (current_task() == kernel_task) 804 panic("FPU segment overrun exception in kernel thread context\n"); 805 806 /* 807 * This is a non-recoverable error. 808 * Invalidate the thread`s FPU state. 809 */ 810 pcb = THREAD_TO_PCB(thr_act); 811 simple_lock(&pcb->lock); 812 ifps = pcb->ifps; 813 pcb->ifps = 0; 814 simple_unlock(&pcb->lock); 815 816 /* 817 * Re-initialize the FPU. 818 */ 819 clear_ts(); 820 fninit(); 821 822 /* 823 * And disable access. 824 */ 825 clear_fpu(); 826 827 (void)ml_set_interrupts_enabled(intr); 828 829 if (ifps) 830 zfree(ifps_zone, ifps); 831 832 /* 833 * Raise exception. 834 */ 835 i386_exception(EXC_BAD_ACCESS, VM_PROT_READ|VM_PROT_EXECUTE, 0); 836 /*NOTREACHED*/ 837} 838 839/* 840 * FPU error. Called by AST. 841 */ 842 843void 844fpexterrflt(void) 845{ 846 thread_t thr_act = current_thread(); 847 struct x86_fx_thread_state *ifps = thr_act->machine.ifps; 848 boolean_t intr; 849 850 intr = ml_set_interrupts_enabled(FALSE); 851 852 if (get_interrupt_level()) 853 panic("FPU error exception at interrupt context\n"); 854 if (current_task() == kernel_task) 855 panic("FPU error exception in kernel thread context\n"); 856 857 /* 858 * Save the FPU state and turn off the FPU. 859 */ 860 fp_save(thr_act); 861 862 (void)ml_set_interrupts_enabled(intr); 863 864 /* 865 * Raise FPU exception. 866 * Locking not needed on pcb->ifps, 867 * since thread is running. 868 */ 869 i386_exception(EXC_ARITHMETIC, 870 EXC_I386_EXTERR, 871 ifps->fx_status); 872 873 /*NOTREACHED*/ 874} 875 876/* 877 * Save FPU state. 878 * 879 * Locking not needed: 880 * . if called from fpu_get_state, pcb already locked. 881 * . if called from fpnoextflt or fp_intr, we are single-cpu 882 * . otherwise, thread is running. 883 * N.B.: Must be called with interrupts disabled 884 */ 885 886void 887fp_save( 888 thread_t thr_act) 889{ 890 pcb_t pcb = THREAD_TO_PCB(thr_act); 891 struct x86_fx_thread_state *ifps = pcb->ifps; 892 893 assert(ifps != 0); 894 if (ifps != 0 && !ifps->fp_valid) { 895 assert((get_cr0() & CR0_TS) == 0); 896 /* registers are in FPU */ 897 ifps->fp_valid = TRUE; 898 fpu_store_registers(ifps, thread_is_64bit(thr_act)); 899 } 900} 901 902/* 903 * Restore FPU state from PCB. 904 * 905 * Locking not needed; always called on the current thread. 906 */ 907 908void 909fp_load( 910 thread_t thr_act) 911{ 912 pcb_t pcb = THREAD_TO_PCB(thr_act); 913 struct x86_fx_thread_state *ifps = pcb->ifps; 914 915 assert(ifps); 916#if DEBUG 917 if (ifps->fp_valid != FALSE && ifps->fp_valid != TRUE) { 918 panic("fp_load() invalid fp_valid: %u, fp_save_layout: %u\n", 919 ifps->fp_valid, ifps->fp_save_layout); 920 } 921#endif 922 923 if (ifps->fp_valid == FALSE) { 924 fpinit(); 925 } else { 926 fpu_load_registers(ifps); 927 } 928 ifps->fp_valid = FALSE; /* in FPU */ 929} 930 931/* 932 * SSE arithmetic exception handling code. 933 * Basically the same as the x87 exception handler with a different subtype 934 */ 935 936void 937fpSSEexterrflt(void) 938{ 939 thread_t thr_act = current_thread(); 940 struct x86_fx_thread_state *ifps = thr_act->machine.ifps; 941 boolean_t intr; 942 943 intr = ml_set_interrupts_enabled(FALSE); 944 945 if (get_interrupt_level()) 946 panic("SSE exception at interrupt context\n"); 947 if (current_task() == kernel_task) 948 panic("SSE exception in kernel thread context\n"); 949 950 /* 951 * Save the FPU state and turn off the FPU. 952 */ 953 fp_save(thr_act); 954 955 (void)ml_set_interrupts_enabled(intr); 956 /* 957 * Raise FPU exception. 958 * Locking not needed on pcb->ifps, 959 * since thread is running. 960 */ 961 962 i386_exception(EXC_ARITHMETIC, 963 EXC_I386_SSEEXTERR, 964 ifps->fx_MXCSR); 965 /*NOTREACHED*/ 966} 967 968void 969fp_setvalid(boolean_t value) { 970 thread_t thr_act = current_thread(); 971 struct x86_fx_thread_state *ifps = thr_act->machine.ifps; 972 973 if (ifps) { 974 ifps->fp_valid = value; 975 976 if (value == TRUE) { 977 boolean_t istate = ml_set_interrupts_enabled(FALSE); 978 clear_fpu(); 979 ml_set_interrupts_enabled(istate); 980 } 981 } 982} 983 984boolean_t 985ml_fpu_avx_enabled(void) { 986 return (fpu_YMM_present == TRUE); 987} 988