1/* 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1992-1990 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58 59#include <platforms.h> 60 61#include <mach/exception_types.h> 62#include <mach/i386/thread_status.h> 63#include <mach/i386/fp_reg.h> 64#include <mach/branch_predicates.h> 65 66#include <kern/mach_param.h> 67#include <kern/processor.h> 68#include <kern/thread.h> 69#include <kern/zalloc.h> 70#include <kern/misc_protos.h> 71#include <kern/spl.h> 72#include <kern/assert.h> 73 74#include <libkern/OSAtomic.h> 75 76#include <architecture/i386/pio.h> 77#include <i386/cpuid.h> 78#include <i386/fpu.h> 79#include <i386/proc_reg.h> 80#include <i386/misc_protos.h> 81#include <i386/thread.h> 82#include <i386/trap.h> 83 84int fp_kind = FP_NO; /* not inited */ 85zone_t ifps_zone; /* zone for FPU save area */ 86 87#define ALIGNED(addr,size) (((uintptr_t)(addr)&((size)-1))==0) 88 89/* Forward */ 90 91extern void fpinit(void); 92extern void fp_save( 93 thread_t thr_act); 94extern void fp_load( 95 thread_t thr_act); 96 97static void configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps); 98 99struct x86_avx_thread_state initial_fp_state __attribute((aligned(64))); 100 101 102/* Global MXCSR capability bitmask */ 103static unsigned int mxcsr_capability_mask; 104 105#define fninit() \ 106 __asm__ volatile("fninit") 107 108#define fnstcw(control) \ 109 __asm__("fnstcw %0" : "=m" (*(unsigned short *)(control))) 110 111#define fldcw(control) \ 112 __asm__ volatile("fldcw %0" : : "m" (*(unsigned short *) &(control)) ) 113 114#define fnclex() \ 115 __asm__ volatile("fnclex") 116 117#define fnsave(state) \ 118 __asm__ volatile("fnsave %0" : "=m" (*state)) 119 120#define frstor(state) \ 121 __asm__ volatile("frstor %0" : : "m" (state)) 122 123#define fwait() \ 124 __asm__("fwait"); 125 126#define fxrstor(addr) __asm__ __volatile__("fxrstor %0" : : "m" (*(addr))) 127#define fxsave(addr) __asm__ __volatile__("fxsave %0" : "=m" (*(addr))) 128 129static uint32_t fp_register_state_size = 0; 130static uint32_t fpu_YMM_present = FALSE; 131static uint32_t cpuid_reevaluated = 0; 132 133static void fpu_store_registers(void *, boolean_t); 134static void fpu_load_registers(void *); 135 136extern void xsave64o(void); 137extern void xrstor64o(void); 138 139#define XMASK ((uint32_t) (XFEM_X87 | XFEM_SSE | XFEM_YMM)) 140 141/* DRK: TODO replace opcodes with mnemonics when assembler support available */ 142 143static inline void xsetbv(uint32_t mask_hi, uint32_t mask_lo) { 144 __asm__ __volatile__(".short 0x010F\n\t.byte 0xD1" :: "a"(mask_lo), "d"(mask_hi), "c" (XCR0)); 145} 146 147static inline void xsave(void *a) { 148 /* MOD 0x4, operand ECX 0x1 */ 149 __asm__ __volatile__(".short 0xAE0F\n\t.byte 0x21" :: "a"(XMASK), "d"(0), "c" (a)); 150} 151 152static inline void xrstor(void *a) { 153 /* MOD 0x5, operand ECX 0x1 */ 154 __asm__ __volatile__(".short 0xAE0F\n\t.byte 0x29" :: "a"(XMASK), "d"(0), "c" (a)); 155} 156 157static inline void xsave64(void *a) { 158 /* Out of line call that executes in 64-bit mode on K32 */ 159 __asm__ __volatile__("call _xsave64o" :: "a"(XMASK), "d"(0), "c" (a)); 160} 161 162static inline void xrstor64(void *a) { 163 /* Out of line call that executes in 64-bit mode on K32 */ 164 __asm__ __volatile__("call _xrstor64o" :: "a"(XMASK), "d"(0), "c" (a)); 165} 166 167static inline unsigned short 168fnstsw(void) 169{ 170 unsigned short status; 171 __asm__ volatile("fnstsw %0" : "=ma" (status)); 172 return(status); 173} 174 175/* 176 * Configure the initial FPU state presented to new threads. 177 * Determine the MXCSR capability mask, which allows us to mask off any 178 * potentially unsafe "reserved" bits before restoring the FPU context. 179 * *Not* per-cpu, assumes symmetry. 180 */ 181 182static void 183configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps) 184{ 185 /* XSAVE requires a 64 byte aligned store */ 186 assert(ALIGNED(fps, 64)); 187 /* Clear, to prepare for the diagnostic FXSAVE */ 188 bzero(fps, sizeof(*fps)); 189 190 fpinit(); 191 fpu_store_registers(fps, FALSE); 192 193 mxcsr_capability_mask = fps->fx_MXCSR_MASK; 194 195 /* Set default mask value if necessary */ 196 if (mxcsr_capability_mask == 0) 197 mxcsr_capability_mask = 0xffbf; 198 199 /* Clear vector register store */ 200 bzero(&fps->fx_XMM_reg[0][0], sizeof(fps->fx_XMM_reg)); 201 bzero(&fps->x_YMMH_reg[0][0], sizeof(fps->x_YMMH_reg)); 202 203 fps->fp_valid = TRUE; 204 fps->fp_save_layout = fpu_YMM_present ? XSAVE32: FXSAVE32; 205 fpu_load_registers(fps); 206 207 /* Poison values to trap unsafe usage */ 208 fps->fp_valid = 0xFFFFFFFF; 209 fps->fp_save_layout = FP_UNUSED; 210 211 /* Re-enable FPU/SSE DNA exceptions */ 212 set_ts(); 213} 214 215 216/* 217 * Look for FPU and initialize it. 218 * Called on each CPU. 219 */ 220void 221init_fpu(void) 222{ 223#if DEBUG 224 unsigned short status; 225 unsigned short control; 226#endif 227 /* 228 * Check for FPU by initializing it, 229 * then trying to read the correct bit patterns from 230 * the control and status registers. 231 */ 232 set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE); /* allow use of FPU */ 233 fninit(); 234#if DEBUG 235 status = fnstsw(); 236 fnstcw(&control); 237 238 assert(((status & 0xff) == 0) && ((control & 0x103f) == 0x3f)); 239#endif 240 /* Advertise SSE support */ 241 if (cpuid_features() & CPUID_FEATURE_FXSR) { 242 fp_kind = FP_FXSR; 243 set_cr4(get_cr4() | CR4_OSFXS); 244 /* And allow SIMD exceptions if present */ 245 if (cpuid_features() & CPUID_FEATURE_SSE) { 246 set_cr4(get_cr4() | CR4_OSXMM); 247 } 248 fp_register_state_size = sizeof(struct x86_fx_thread_state); 249 250 } else 251 panic("fpu is not FP_FXSR"); 252 253 /* Configure the XSAVE context mechanism if the processor supports 254 * AVX/YMM registers 255 */ 256 if (cpuid_features() & CPUID_FEATURE_XSAVE) { 257 cpuid_xsave_leaf_t *xsp = &cpuid_info()->cpuid_xsave_leaf; 258 if (xsp->extended_state[0] & (uint32_t)XFEM_YMM) { 259 assert(xsp->extended_state[0] & (uint32_t) XFEM_SSE); 260 /* XSAVE container size for all features */ 261 assert(xsp->extended_state[2] == sizeof(struct x86_avx_thread_state)); 262 fp_register_state_size = sizeof(struct x86_avx_thread_state); 263 fpu_YMM_present = TRUE; 264 set_cr4(get_cr4() | CR4_OSXSAVE); 265 xsetbv(0, XMASK); 266 /* Re-evaluate CPUID, once, to reflect OSXSAVE */ 267 if (OSCompareAndSwap(0, 1, &cpuid_reevaluated)) 268 cpuid_set_info(); 269 /* DRK: consider verifying AVX offset with cpuid(d, ECX:2) */ 270 } 271 } 272 else 273 fpu_YMM_present = FALSE; 274 275 fpinit(); 276 277 /* 278 * Trap wait instructions. Turn off FPU for now. 279 */ 280 set_cr0(get_cr0() | CR0_TS | CR0_MP); 281} 282 283/* 284 * Allocate and initialize FP state for current thread. 285 * Don't load state. 286 */ 287static void * 288fp_state_alloc(void) 289{ 290 void *ifps = zalloc(ifps_zone); 291 292#if DEBUG 293 if (!(ALIGNED(ifps,64))) { 294 panic("fp_state_alloc: %p, %u, %p, %u", ifps, (unsigned) ifps_zone->elem_size, (void *) ifps_zone->free_elements, (unsigned) ifps_zone->alloc_size); 295 } 296#endif 297 return ifps; 298} 299 300static inline void 301fp_state_free(void *ifps) 302{ 303 zfree(ifps_zone, ifps); 304} 305 306void clear_fpu(void) 307{ 308 set_ts(); 309} 310 311 312static void fpu_load_registers(void *fstate) { 313 struct x86_fx_thread_state *ifps = fstate; 314 fp_save_layout_t layout = ifps->fp_save_layout; 315 316 assert(layout == FXSAVE32 || layout == FXSAVE64 || layout == XSAVE32 || layout == XSAVE64); 317 assert(ALIGNED(ifps, 64)); 318 assert(ml_get_interrupts_enabled() == FALSE); 319 320#if DEBUG 321 if (layout == XSAVE32 || layout == XSAVE64) { 322 struct x86_avx_thread_state *iavx = fstate; 323 unsigned i; 324 /* Verify reserved bits in the XSAVE header*/ 325 if (iavx->_xh.xsbv & ~7) 326 panic("iavx->_xh.xsbv: 0x%llx", iavx->_xh.xsbv); 327 for (i = 0; i < sizeof(iavx->_xh.xhrsvd); i++) 328 if (iavx->_xh.xhrsvd[i]) 329 panic("Reserved bit set"); 330 } 331 if (fpu_YMM_present) { 332 if (layout != XSAVE32 && layout != XSAVE64) 333 panic("Inappropriate layout: %u\n", layout); 334 } 335#endif /* DEBUG */ 336 337#if defined(__i386__) 338 if (layout == FXSAVE32) { 339 /* Restore the compatibility/legacy mode XMM+x87 state */ 340 fxrstor(ifps); 341 } 342 else if (layout == FXSAVE64) { 343 fxrstor64(ifps); 344 } 345 else if (layout == XSAVE32) { 346 xrstor(ifps); 347 } 348 else if (layout == XSAVE64) { 349 xrstor64(ifps); 350 } 351#elif defined(__x86_64__) 352 if ((layout == XSAVE64) || (layout == XSAVE32)) 353 xrstor(ifps); 354 else 355 fxrstor(ifps); 356#endif 357} 358 359static void fpu_store_registers(void *fstate, boolean_t is64) { 360 struct x86_fx_thread_state *ifps = fstate; 361 assert(ALIGNED(ifps, 64)); 362#if defined(__i386__) 363 if (!is64) { 364 if (fpu_YMM_present) { 365 xsave(ifps); 366 ifps->fp_save_layout = XSAVE32; 367 } 368 else { 369 /* save the compatibility/legacy mode XMM+x87 state */ 370 fxsave(ifps); 371 ifps->fp_save_layout = FXSAVE32; 372 } 373 } 374 else { 375 if (fpu_YMM_present) { 376 xsave64(ifps); 377 ifps->fp_save_layout = XSAVE64; 378 } 379 else { 380 fxsave64(ifps); 381 ifps->fp_save_layout = FXSAVE64; 382 } 383 } 384#elif defined(__x86_64__) 385 if (fpu_YMM_present) { 386 xsave(ifps); 387 ifps->fp_save_layout = is64 ? XSAVE64 : XSAVE32; 388 } 389 else { 390 fxsave(ifps); 391 ifps->fp_save_layout = is64 ? FXSAVE64 : FXSAVE32; 392 } 393#endif 394} 395 396/* 397 * Initialize FP handling. 398 */ 399 400void 401fpu_module_init(void) 402{ 403 if ((fp_register_state_size != sizeof(struct x86_fx_thread_state)) && 404 (fp_register_state_size != sizeof(struct x86_avx_thread_state))) 405 panic("fpu_module_init: incorrect savearea size %u\n", fp_register_state_size); 406 407 assert(fpu_YMM_present != 0xFFFFFFFF); 408 409 /* We explicitly choose an allocation size of 64 410 * to eliminate waste for the 832 byte sized 411 * AVX XSAVE register save area. 412 */ 413 ifps_zone = zinit(fp_register_state_size, 414 thread_max * fp_register_state_size, 415 64 * fp_register_state_size, 416 "x86 fpsave state"); 417 418 /* To maintain the required alignment, disable 419 * zone debugging for this zone as that appends 420 * 16 bytes to each element. 421 */ 422 zone_change(ifps_zone, Z_ALIGNMENT_REQUIRED, TRUE); 423 /* Determine MXCSR reserved bits and configure initial FPU state*/ 424 configure_mxcsr_capability_mask(&initial_fp_state); 425} 426 427/* 428 * Save thread`s FPU context. 429 */ 430void 431fpu_save_context(thread_t thread) 432{ 433 struct x86_fx_thread_state *ifps; 434 435 assert(ml_get_interrupts_enabled() == FALSE); 436 ifps = (thread)->machine.ifps; 437#if DEBUG 438 if (ifps && ((ifps->fp_valid != FALSE) && (ifps->fp_valid != TRUE))) { 439 panic("ifps->fp_valid: %u\n", ifps->fp_valid); 440 } 441#endif 442 if (ifps != 0 && (ifps->fp_valid == FALSE)) { 443 /* Clear CR0.TS in preparation for the FP context save. In 444 * theory, this shouldn't be necessary since a live FPU should 445 * indicate that TS is clear. However, various routines 446 * (such as sendsig & sigreturn) manipulate TS directly. 447 */ 448 clear_ts(); 449 /* registers are in FPU - save to memory */ 450 fpu_store_registers(ifps, (thread_is_64bit(thread) && is_saved_state64(thread->machine.iss))); 451 ifps->fp_valid = TRUE; 452 } 453 set_ts(); 454} 455 456 457/* 458 * Free a FPU save area. 459 * Called only when thread terminating - no locking necessary. 460 */ 461void 462fpu_free(void *fps) 463{ 464 fp_state_free(fps); 465} 466 467/* 468 * Set the floating-point state for a thread based 469 * on the FXSave formatted data. This is basically 470 * the same as fpu_set_state except it uses the 471 * expanded data structure. 472 * If the thread is not the current thread, it is 473 * not running (held). Locking needed against 474 * concurrent fpu_set_state or fpu_get_state. 475 */ 476kern_return_t 477fpu_set_fxstate( 478 thread_t thr_act, 479 thread_state_t tstate, 480 thread_flavor_t f) 481{ 482 struct x86_fx_thread_state *ifps; 483 struct x86_fx_thread_state *new_ifps; 484 x86_float_state64_t *state; 485 pcb_t pcb; 486 size_t state_size = sizeof(struct x86_fx_thread_state); 487 boolean_t old_valid; 488 if (fp_kind == FP_NO) 489 return KERN_FAILURE; 490 491 if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) && 492 !ml_fpu_avx_enabled()) 493 return KERN_FAILURE; 494 495 state = (x86_float_state64_t *)tstate; 496 497 assert(thr_act != THREAD_NULL); 498 pcb = THREAD_TO_PCB(thr_act); 499 500 if (state == NULL) { 501 /* 502 * new FPU state is 'invalid'. 503 * Deallocate the fp state if it exists. 504 */ 505 simple_lock(&pcb->lock); 506 507 ifps = pcb->ifps; 508 pcb->ifps = 0; 509 510 simple_unlock(&pcb->lock); 511 512 if (ifps != 0) 513 fp_state_free(ifps); 514 } else { 515 /* 516 * Valid state. Allocate the fp state if there is none. 517 */ 518 new_ifps = 0; 519 Retry: 520 simple_lock(&pcb->lock); 521 522 ifps = pcb->ifps; 523 if (ifps == 0) { 524 if (new_ifps == 0) { 525 simple_unlock(&pcb->lock); 526 new_ifps = fp_state_alloc(); 527 goto Retry; 528 } 529 ifps = new_ifps; 530 new_ifps = 0; 531 pcb->ifps = ifps; 532 } 533 /* 534 * now copy over the new data. 535 */ 536 old_valid = ifps->fp_valid; 537 538#if DEBUG 539 if ((old_valid == FALSE) && (thr_act != current_thread())) { 540 panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act); 541 } 542#endif 543 /* 544 * Clear any reserved bits in the MXCSR to prevent a GPF 545 * when issuing an FXRSTOR. 546 */ 547 548 state->fpu_mxcsr &= mxcsr_capability_mask; 549 550 bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size); 551 552 if (fpu_YMM_present) { 553 struct x86_avx_thread_state *iavx = (void *) ifps; 554 uint32_t fpu_nyreg = 0; 555 556 if (f == x86_AVX_STATE32) 557 fpu_nyreg = 8; 558 else if (f == x86_AVX_STATE64) 559 fpu_nyreg = 16; 560 561 if (fpu_nyreg) { 562 x86_avx_state64_t *ystate = (x86_avx_state64_t *) state; 563 bcopy(&ystate->__fpu_ymmh0, &iavx->x_YMMH_reg[0][0], fpu_nyreg * sizeof(_STRUCT_XMM_REG)); 564 } 565 566 iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32; 567 /* Sanitize XSAVE header */ 568 bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd)); 569 if (fpu_nyreg) 570 iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87); 571 else 572 iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87); 573 } 574 else 575 ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32; 576 ifps->fp_valid = old_valid; 577 578 if (old_valid == FALSE) { 579 boolean_t istate = ml_set_interrupts_enabled(FALSE); 580 ifps->fp_valid = TRUE; 581 set_ts(); 582 ml_set_interrupts_enabled(istate); 583 } 584 585 simple_unlock(&pcb->lock); 586 587 if (new_ifps != 0) 588 fp_state_free(new_ifps); 589 } 590 return KERN_SUCCESS; 591} 592 593/* 594 * Get the floating-point state for a thread. 595 * If the thread is not the current thread, it is 596 * not running (held). Locking needed against 597 * concurrent fpu_set_state or fpu_get_state. 598 */ 599kern_return_t 600fpu_get_fxstate( 601 thread_t thr_act, 602 thread_state_t tstate, 603 thread_flavor_t f) 604{ 605 struct x86_fx_thread_state *ifps; 606 x86_float_state64_t *state; 607 kern_return_t ret = KERN_FAILURE; 608 pcb_t pcb; 609 size_t state_size = sizeof(struct x86_fx_thread_state); 610 611 if (fp_kind == FP_NO) 612 return KERN_FAILURE; 613 614 if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) && 615 !ml_fpu_avx_enabled()) 616 return KERN_FAILURE; 617 618 state = (x86_float_state64_t *)tstate; 619 620 assert(thr_act != THREAD_NULL); 621 pcb = THREAD_TO_PCB(thr_act); 622 623 simple_lock(&pcb->lock); 624 625 ifps = pcb->ifps; 626 if (ifps == 0) { 627 /* 628 * No valid floating-point state. 629 */ 630 631 bcopy((char *)&initial_fp_state, (char *)&state->fpu_fcw, 632 state_size); 633 634 simple_unlock(&pcb->lock); 635 636 return KERN_SUCCESS; 637 } 638 /* 639 * Make sure we`ve got the latest fp state info 640 * If the live fpu state belongs to our target 641 */ 642 if (thr_act == current_thread()) { 643 boolean_t intr; 644 645 intr = ml_set_interrupts_enabled(FALSE); 646 647 clear_ts(); 648 fp_save(thr_act); 649 clear_fpu(); 650 651 (void)ml_set_interrupts_enabled(intr); 652 } 653 if (ifps->fp_valid) { 654 bcopy((char *)ifps, (char *)&state->fpu_fcw, state_size); 655 if (fpu_YMM_present) { 656 struct x86_avx_thread_state *iavx = (void *) ifps; 657 uint32_t fpu_nyreg = 0; 658 659 if (f == x86_AVX_STATE32) 660 fpu_nyreg = 8; 661 else if (f == x86_AVX_STATE64) 662 fpu_nyreg = 16; 663 664 if (fpu_nyreg) { 665 x86_avx_state64_t *ystate = (x86_avx_state64_t *) state; 666 bcopy(&iavx->x_YMMH_reg[0][0], &ystate->__fpu_ymmh0, fpu_nyreg * sizeof(_STRUCT_XMM_REG)); 667 } 668 } 669 670 ret = KERN_SUCCESS; 671 } 672 simple_unlock(&pcb->lock); 673 674 return ret; 675} 676 677 678 679/* 680 * the child thread is 'stopped' with the thread 681 * mutex held and is currently not known by anyone 682 * so no way for fpu state to get manipulated by an 683 * outside agency -> no need for pcb lock 684 */ 685 686void 687fpu_dup_fxstate( 688 thread_t parent, 689 thread_t child) 690{ 691 struct x86_fx_thread_state *new_ifps = NULL; 692 boolean_t intr; 693 pcb_t ppcb; 694 695 ppcb = THREAD_TO_PCB(parent); 696 697 if (ppcb->ifps == NULL) 698 return; 699 700 if (child->machine.ifps) 701 panic("fpu_dup_fxstate: child's ifps non-null"); 702 703 new_ifps = fp_state_alloc(); 704 705 simple_lock(&ppcb->lock); 706 707 if (ppcb->ifps != NULL) { 708 struct x86_fx_thread_state *ifps = ppcb->ifps; 709 /* 710 * Make sure we`ve got the latest fp state info 711 */ 712 intr = ml_set_interrupts_enabled(FALSE); 713 assert(current_thread() == parent); 714 clear_ts(); 715 fp_save(parent); 716 clear_fpu(); 717 718 (void)ml_set_interrupts_enabled(intr); 719 720 if (ifps->fp_valid) { 721 child->machine.ifps = new_ifps; 722 assert((fp_register_state_size == sizeof(struct x86_fx_thread_state)) || 723 (fp_register_state_size == sizeof(struct x86_avx_thread_state))); 724 bcopy((char *)(ppcb->ifps), 725 (char *)(child->machine.ifps), fp_register_state_size); 726 727 /* Mark the new fp saved state as non-live. */ 728 /* Temporarily disabled: radar 4647827 729 * new_ifps->fp_valid = TRUE; 730 */ 731 732 /* 733 * Clear any reserved bits in the MXCSR to prevent a GPF 734 * when issuing an FXRSTOR. 735 */ 736 new_ifps->fx_MXCSR &= mxcsr_capability_mask; 737 new_ifps = NULL; 738 } 739 } 740 simple_unlock(&ppcb->lock); 741 742 if (new_ifps != NULL) 743 fp_state_free(new_ifps); 744} 745 746 747/* 748 * Initialize FPU. 749 * 750 */ 751 752void 753fpinit(void) 754{ 755 unsigned short control; 756 757 clear_ts(); 758 fninit(); 759 fnstcw(&control); 760 control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */ 761 control |= (FPC_PC_64 | /* Set precision */ 762 FPC_RC_RN | /* round-to-nearest */ 763 FPC_ZE | /* Suppress zero-divide */ 764 FPC_OE | /* and overflow */ 765 FPC_UE | /* underflow */ 766 FPC_IE | /* Allow NaNQs and +-INF */ 767 FPC_DE | /* Allow denorms as operands */ 768 FPC_PE); /* No trap for precision loss */ 769 fldcw(control); 770 771 /* Initialize SSE/SSE2 */ 772 __builtin_ia32_ldmxcsr(0x1f80); 773} 774 775/* 776 * Coprocessor not present. 777 */ 778 779void 780fpnoextflt(void) 781{ 782 boolean_t intr; 783 thread_t thr_act; 784 pcb_t pcb; 785 struct x86_fx_thread_state *ifps = 0; 786 787 thr_act = current_thread(); 788 pcb = THREAD_TO_PCB(thr_act); 789 790 assert(fp_register_state_size != 0); 791 792 if (pcb->ifps == 0 && !get_interrupt_level()) { 793 ifps = fp_state_alloc(); 794 bcopy((char *)&initial_fp_state, (char *)ifps, 795 fp_register_state_size); 796 if (!thread_is_64bit(thr_act)) { 797 ifps->fp_save_layout = fpu_YMM_present ? XSAVE32 : FXSAVE32; 798 } 799 else 800 ifps->fp_save_layout = fpu_YMM_present ? XSAVE64 : FXSAVE64; 801 ifps->fp_valid = TRUE; 802 } 803 intr = ml_set_interrupts_enabled(FALSE); 804 805 clear_ts(); /* Enable FPU use */ 806 807 if (__improbable(get_interrupt_level())) { 808 /* 809 * Save current coprocessor context if valid 810 * Initialize coprocessor live context 811 */ 812 fp_save(thr_act); 813 fpinit(); 814 } else { 815 if (pcb->ifps == 0) { 816 pcb->ifps = ifps; 817 ifps = 0; 818 } 819 /* 820 * Load this thread`s state into coprocessor live context. 821 */ 822 fp_load(thr_act); 823 } 824 (void)ml_set_interrupts_enabled(intr); 825 826 if (ifps) 827 fp_state_free(ifps); 828} 829 830/* 831 * FPU overran end of segment. 832 * Re-initialize FPU. Floating point state is not valid. 833 */ 834 835void 836fpextovrflt(void) 837{ 838 thread_t thr_act = current_thread(); 839 pcb_t pcb; 840 struct x86_fx_thread_state *ifps; 841 boolean_t intr; 842 843 intr = ml_set_interrupts_enabled(FALSE); 844 845 if (get_interrupt_level()) 846 panic("FPU segment overrun exception at interrupt context\n"); 847 if (current_task() == kernel_task) 848 panic("FPU segment overrun exception in kernel thread context\n"); 849 850 /* 851 * This is a non-recoverable error. 852 * Invalidate the thread`s FPU state. 853 */ 854 pcb = THREAD_TO_PCB(thr_act); 855 simple_lock(&pcb->lock); 856 ifps = pcb->ifps; 857 pcb->ifps = 0; 858 simple_unlock(&pcb->lock); 859 860 /* 861 * Re-initialize the FPU. 862 */ 863 clear_ts(); 864 fninit(); 865 866 /* 867 * And disable access. 868 */ 869 clear_fpu(); 870 871 (void)ml_set_interrupts_enabled(intr); 872 873 if (ifps) 874 zfree(ifps_zone, ifps); 875 876 /* 877 * Raise exception. 878 */ 879 i386_exception(EXC_BAD_ACCESS, VM_PROT_READ|VM_PROT_EXECUTE, 0); 880 /*NOTREACHED*/ 881} 882 883/* 884 * FPU error. Called by AST. 885 */ 886 887void 888fpexterrflt(void) 889{ 890 thread_t thr_act = current_thread(); 891 struct x86_fx_thread_state *ifps = thr_act->machine.ifps; 892 boolean_t intr; 893 894 intr = ml_set_interrupts_enabled(FALSE); 895 896 if (get_interrupt_level()) 897 panic("FPU error exception at interrupt context\n"); 898 if (current_task() == kernel_task) 899 panic("FPU error exception in kernel thread context\n"); 900 901 /* 902 * Save the FPU state and turn off the FPU. 903 */ 904 fp_save(thr_act); 905 906 (void)ml_set_interrupts_enabled(intr); 907 908 /* 909 * Raise FPU exception. 910 * Locking not needed on pcb->ifps, 911 * since thread is running. 912 */ 913 i386_exception(EXC_ARITHMETIC, 914 EXC_I386_EXTERR, 915 ifps->fx_status); 916 917 /*NOTREACHED*/ 918} 919 920/* 921 * Save FPU state. 922 * 923 * Locking not needed: 924 * . if called from fpu_get_state, pcb already locked. 925 * . if called from fpnoextflt or fp_intr, we are single-cpu 926 * . otherwise, thread is running. 927 * N.B.: Must be called with interrupts disabled 928 */ 929 930void 931fp_save( 932 thread_t thr_act) 933{ 934 pcb_t pcb = THREAD_TO_PCB(thr_act); 935 struct x86_fx_thread_state *ifps = pcb->ifps; 936 937 assert(ifps != 0); 938 if (ifps != 0 && !ifps->fp_valid) { 939 assert((get_cr0() & CR0_TS) == 0); 940 /* registers are in FPU */ 941 ifps->fp_valid = TRUE; 942 fpu_store_registers(ifps, thread_is_64bit(thr_act)); 943 } 944} 945 946/* 947 * Restore FPU state from PCB. 948 * 949 * Locking not needed; always called on the current thread. 950 */ 951 952void 953fp_load( 954 thread_t thr_act) 955{ 956 pcb_t pcb = THREAD_TO_PCB(thr_act); 957 struct x86_fx_thread_state *ifps = pcb->ifps; 958 959 assert(ifps); 960 assert(ifps->fp_valid == FALSE || ifps->fp_valid == TRUE); 961 962 if (ifps->fp_valid == FALSE) { 963 fpinit(); 964 } else { 965 fpu_load_registers(ifps); 966 } 967 ifps->fp_valid = FALSE; /* in FPU */ 968} 969 970/* 971 * SSE arithmetic exception handling code. 972 * Basically the same as the x87 exception handler with a different subtype 973 */ 974 975void 976fpSSEexterrflt(void) 977{ 978 thread_t thr_act = current_thread(); 979 struct x86_fx_thread_state *ifps = thr_act->machine.ifps; 980 boolean_t intr; 981 982 intr = ml_set_interrupts_enabled(FALSE); 983 984 if (get_interrupt_level()) 985 panic("SSE exception at interrupt context\n"); 986 if (current_task() == kernel_task) 987 panic("SSE exception in kernel thread context\n"); 988 989 /* 990 * Save the FPU state and turn off the FPU. 991 */ 992 fp_save(thr_act); 993 994 (void)ml_set_interrupts_enabled(intr); 995 /* 996 * Raise FPU exception. 997 * Locking not needed on pcb->ifps, 998 * since thread is running. 999 */ 1000 assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64); 1001 i386_exception(EXC_ARITHMETIC, 1002 EXC_I386_SSEEXTERR, 1003 ifps->fx_MXCSR); 1004 /*NOTREACHED*/ 1005} 1006 1007void 1008fp_setvalid(boolean_t value) { 1009 thread_t thr_act = current_thread(); 1010 struct x86_fx_thread_state *ifps = thr_act->machine.ifps; 1011 1012 if (ifps) { 1013 ifps->fp_valid = value; 1014 1015 if (value == TRUE) { 1016 boolean_t istate = ml_set_interrupts_enabled(FALSE); 1017 clear_fpu(); 1018 ml_set_interrupts_enabled(istate); 1019 } 1020 } 1021} 1022 1023boolean_t 1024ml_fpu_avx_enabled(void) { 1025 return (fpu_YMM_present == TRUE); 1026} 1027