1/* 2 * Copyright (C) 1994 Linus Torvalds 3 * 4 * Pentium III FXSR, SSE support 5 * General FPU state handling cleanups 6 * Gareth Hughes <gareth@valinux.com>, May 2000 7 * x86-64 work by Andi Kleen 2002 8 */ 9 10#ifndef _ASM_X86_I387_H 11#define _ASM_X86_I387_H 12 13#ifndef __ASSEMBLY__ 14 15#include <linux/sched.h> 16#include <linux/kernel_stat.h> 17#include <linux/regset.h> 18#include <linux/hardirq.h> 19#include <linux/slab.h> 20#include <asm/asm.h> 21#include <asm/cpufeature.h> 22#include <asm/processor.h> 23#include <asm/sigcontext.h> 24#include <asm/user.h> 25#include <asm/uaccess.h> 26#include <asm/xsave.h> 27 28extern unsigned int sig_xstate_size; 29extern void fpu_init(void); 30extern void mxcsr_feature_mask_init(void); 31extern int init_fpu(struct task_struct *child); 32extern asmlinkage void math_state_restore(void); 33extern void __math_state_restore(void); 34extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); 35 36extern user_regset_active_fn fpregs_active, xfpregs_active; 37extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, 38 xstateregs_get; 39extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, 40 xstateregs_set; 41 42/* 43 * xstateregs_active == fpregs_active. Please refer to the comment 44 * at the definition of fpregs_active. 45 */ 46#define xstateregs_active fpregs_active 47 48extern struct _fpx_sw_bytes fx_sw_reserved; 49#ifdef CONFIG_IA32_EMULATION 50extern unsigned int sig_xstate_ia32_size; 51extern struct _fpx_sw_bytes fx_sw_reserved_ia32; 52struct _fpstate_ia32; 53struct _xstate_ia32; 54extern int save_i387_xstate_ia32(void __user *buf); 55extern int restore_i387_xstate_ia32(void __user *buf); 56#endif 57 58#define X87_FSW_ES (1 << 7) /* Exception Summary */ 59 60static __always_inline __pure bool use_xsaveopt(void) 61{ 62 return static_cpu_has(X86_FEATURE_XSAVEOPT); 63} 64 65static __always_inline __pure bool use_xsave(void) 66{ 67 return static_cpu_has(X86_FEATURE_XSAVE); 68} 69 70extern void __sanitize_i387_state(struct task_struct *); 71 72static inline void sanitize_i387_state(struct task_struct *tsk) 73{ 74 if (!use_xsaveopt()) 75 return; 76 __sanitize_i387_state(tsk); 77} 78 79#ifdef CONFIG_X86_64 80 81/* Ignore delayed exceptions from user space */ 82static inline void tolerant_fwait(void) 83{ 84 asm volatile("1: fwait\n" 85 "2:\n" 86 _ASM_EXTABLE(1b, 2b)); 87} 88 89static inline int fxrstor_checking(struct i387_fxsave_struct *fx) 90{ 91 int err; 92 93 asm volatile("1: rex64/fxrstor (%[fx])\n\t" 94 "2:\n" 95 ".section .fixup,\"ax\"\n" 96 "3: movl $-1,%[err]\n" 97 " jmp 2b\n" 98 ".previous\n" 99 _ASM_EXTABLE(1b, 3b) 100 : [err] "=r" (err) 101 : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); 102 return err; 103} 104 105/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception 106 is pending. Clear the x87 state here by setting it to fixed 107 values. The kernel data segment can be sometimes 0 and sometimes 108 new user value. Both should be ok. 109 Use the PDA as safe address because it should be already in L1. */ 110static inline void fpu_clear(struct fpu *fpu) 111{ 112 struct xsave_struct *xstate = &fpu->state->xsave; 113 struct i387_fxsave_struct *fx = &fpu->state->fxsave; 114 115 /* 116 * xsave header may indicate the init state of the FP. 117 */ 118 if (use_xsave() && 119 !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) 120 return; 121 122 if (unlikely(fx->swd & X87_FSW_ES)) 123 asm volatile("fnclex"); 124 alternative_input(ASM_NOP8 ASM_NOP2, 125 " emms\n" /* clear stack tags */ 126 " fildl %%gs:0", /* load to clear state */ 127 X86_FEATURE_FXSAVE_LEAK); 128} 129 130static inline void clear_fpu_state(struct task_struct *tsk) 131{ 132 fpu_clear(&tsk->thread.fpu); 133} 134 135static inline int fxsave_user(struct i387_fxsave_struct __user *fx) 136{ 137 int err; 138 139 /* 140 * Clear the bytes not touched by the fxsave and reserved 141 * for the SW usage. 142 */ 143 err = __clear_user(&fx->sw_reserved, 144 sizeof(struct _fpx_sw_bytes)); 145 if (unlikely(err)) 146 return -EFAULT; 147 148 asm volatile("1: rex64/fxsave (%[fx])\n\t" 149 "2:\n" 150 ".section .fixup,\"ax\"\n" 151 "3: movl $-1,%[err]\n" 152 " jmp 2b\n" 153 ".previous\n" 154 _ASM_EXTABLE(1b, 3b) 155 : [err] "=r" (err), "=m" (*fx) 156 : [fx] "cdaSDb" (fx), "0" (0)); 157 if (unlikely(err) && 158 __clear_user(fx, sizeof(struct i387_fxsave_struct))) 159 err = -EFAULT; 160 /* No need to clear here because the caller clears USED_MATH */ 161 return err; 162} 163 164static inline void fpu_fxsave(struct fpu *fpu) 165{ 166 /* Using "rex64; fxsave %0" is broken because, if the memory operand 167 uses any extended registers for addressing, a second REX prefix 168 will be generated (to the assembler, rex64 followed by semicolon 169 is a separate instruction), and hence the 64-bitness is lost. */ 170 __asm__ __volatile__("rex64/fxsave (%1)" 171 : "=m" (fpu->state->fxsave) 172 : "cdaSDb" (&fpu->state->fxsave)); 173} 174 175static inline void fpu_save_init(struct fpu *fpu) 176{ 177 if (use_xsave()) 178 fpu_xsave(fpu); 179 else 180 fpu_fxsave(fpu); 181 182 fpu_clear(fpu); 183} 184 185static inline void __save_init_fpu(struct task_struct *tsk) 186{ 187 fpu_save_init(&tsk->thread.fpu); 188 task_thread_info(tsk)->status &= ~TS_USEDFPU; 189} 190 191#else /* CONFIG_X86_32 */ 192 193#ifdef CONFIG_MATH_EMULATION 194extern void finit_soft_fpu(struct i387_soft_struct *soft); 195#else 196static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} 197#endif 198 199static inline void tolerant_fwait(void) 200{ 201 asm volatile("fnclex ; fwait"); 202} 203 204/* perform fxrstor iff the processor has extended states, otherwise frstor */ 205static inline int fxrstor_checking(struct i387_fxsave_struct *fx) 206{ 207 /* 208 * The "nop" is needed to make the instructions the same 209 * length. 210 */ 211 alternative_input( 212 "nop ; frstor %1", 213 "fxrstor %1", 214 X86_FEATURE_FXSR, 215 "m" (*fx)); 216 217 return 0; 218} 219 220/* We need a safe address that is cheap to find and that is already 221 in L1 during context switch. The best choices are unfortunately 222 different for UP and SMP */ 223#ifdef CONFIG_SMP 224#define safe_address (__per_cpu_offset[0]) 225#else 226#define safe_address (kstat_cpu(0).cpustat.user) 227#endif 228 229/* 230 * These must be called with preempt disabled 231 */ 232static inline void fpu_save_init(struct fpu *fpu) 233{ 234 if (use_xsave()) { 235 struct xsave_struct *xstate = &fpu->state->xsave; 236 struct i387_fxsave_struct *fx = &fpu->state->fxsave; 237 238 fpu_xsave(fpu); 239 240 /* 241 * xsave header may indicate the init state of the FP. 242 */ 243 if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) 244 goto end; 245 246 if (unlikely(fx->swd & X87_FSW_ES)) 247 asm volatile("fnclex"); 248 249 /* 250 * we can do a simple return here or be paranoid :) 251 */ 252 goto clear_state; 253 } 254 255 /* Use more nops than strictly needed in case the compiler 256 varies code */ 257 alternative_input( 258 "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4, 259 "fxsave %[fx]\n" 260 "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", 261 X86_FEATURE_FXSR, 262 [fx] "m" (fpu->state->fxsave), 263 [fsw] "m" (fpu->state->fxsave.swd) : "memory"); 264clear_state: 265 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 266 is pending. Clear the x87 state here by setting it to fixed 267 values. safe_address is a random variable that should be in L1 */ 268 alternative_input( 269 GENERIC_NOP8 GENERIC_NOP2, 270 "emms\n\t" /* clear stack tags */ 271 "fildl %[addr]", /* set F?P to defined value */ 272 X86_FEATURE_FXSAVE_LEAK, 273 [addr] "m" (safe_address)); 274end: 275 ; 276} 277 278static inline void __save_init_fpu(struct task_struct *tsk) 279{ 280 fpu_save_init(&tsk->thread.fpu); 281 task_thread_info(tsk)->status &= ~TS_USEDFPU; 282} 283 284 285#endif /* CONFIG_X86_64 */ 286 287static inline int fpu_fxrstor_checking(struct fpu *fpu) 288{ 289 return fxrstor_checking(&fpu->state->fxsave); 290} 291 292static inline int fpu_restore_checking(struct fpu *fpu) 293{ 294 if (use_xsave()) 295 return fpu_xrstor_checking(fpu); 296 else 297 return fpu_fxrstor_checking(fpu); 298} 299 300static inline int restore_fpu_checking(struct task_struct *tsk) 301{ 302 return fpu_restore_checking(&tsk->thread.fpu); 303} 304 305/* 306 * Signal frame handlers... 307 */ 308extern int save_i387_xstate(void __user *buf); 309extern int restore_i387_xstate(void __user *buf); 310 311static inline void __unlazy_fpu(struct task_struct *tsk) 312{ 313 if (task_thread_info(tsk)->status & TS_USEDFPU) { 314 __save_init_fpu(tsk); 315 stts(); 316 } else 317 tsk->fpu_counter = 0; 318} 319 320static inline void __clear_fpu(struct task_struct *tsk) 321{ 322 if (task_thread_info(tsk)->status & TS_USEDFPU) { 323 tolerant_fwait(); 324 task_thread_info(tsk)->status &= ~TS_USEDFPU; 325 stts(); 326 } 327} 328 329static inline void kernel_fpu_begin(void) 330{ 331 struct thread_info *me = current_thread_info(); 332 preempt_disable(); 333 if (me->status & TS_USEDFPU) 334 __save_init_fpu(me->task); 335 else 336 clts(); 337} 338 339static inline void kernel_fpu_end(void) 340{ 341 stts(); 342 preempt_enable(); 343} 344 345static inline bool irq_fpu_usable(void) 346{ 347 struct pt_regs *regs; 348 349 return !in_interrupt() || !(regs = get_irq_regs()) || \ 350 user_mode(regs) || (read_cr0() & X86_CR0_TS); 351} 352 353/* 354 * Some instructions like VIA's padlock instructions generate a spurious 355 * DNA fault but don't modify SSE registers. And these instructions 356 * get used from interrupt context as well. To prevent these kernel instructions 357 * in interrupt context interacting wrongly with other user/kernel fpu usage, we 358 * should use them only in the context of irq_ts_save/restore() 359 */ 360static inline int irq_ts_save(void) 361{ 362 /* 363 * If in process context and not atomic, we can take a spurious DNA fault. 364 * Otherwise, doing clts() in process context requires disabling preemption 365 * or some heavy lifting like kernel_fpu_begin() 366 */ 367 if (!in_atomic()) 368 return 0; 369 370 if (read_cr0() & X86_CR0_TS) { 371 clts(); 372 return 1; 373 } 374 375 return 0; 376} 377 378static inline void irq_ts_restore(int TS_state) 379{ 380 if (TS_state) 381 stts(); 382} 383 384#ifdef CONFIG_X86_64 385 386static inline void save_init_fpu(struct task_struct *tsk) 387{ 388 __save_init_fpu(tsk); 389 stts(); 390} 391 392#define unlazy_fpu __unlazy_fpu 393#define clear_fpu __clear_fpu 394 395#else /* CONFIG_X86_32 */ 396 397/* 398 * These disable preemption on their own and are safe 399 */ 400static inline void save_init_fpu(struct task_struct *tsk) 401{ 402 preempt_disable(); 403 __save_init_fpu(tsk); 404 stts(); 405 preempt_enable(); 406} 407 408static inline void unlazy_fpu(struct task_struct *tsk) 409{ 410 preempt_disable(); 411 __unlazy_fpu(tsk); 412 preempt_enable(); 413} 414 415static inline void clear_fpu(struct task_struct *tsk) 416{ 417 preempt_disable(); 418 __clear_fpu(tsk); 419 preempt_enable(); 420} 421 422#endif /* CONFIG_X86_64 */ 423 424/* 425 * i387 state interaction 426 */ 427static inline unsigned short get_fpu_cwd(struct task_struct *tsk) 428{ 429 if (cpu_has_fxsr) { 430 return tsk->thread.fpu.state->fxsave.cwd; 431 } else { 432 return (unsigned short)tsk->thread.fpu.state->fsave.cwd; 433 } 434} 435 436static inline unsigned short get_fpu_swd(struct task_struct *tsk) 437{ 438 if (cpu_has_fxsr) { 439 return tsk->thread.fpu.state->fxsave.swd; 440 } else { 441 return (unsigned short)tsk->thread.fpu.state->fsave.swd; 442 } 443} 444 445static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) 446{ 447 if (cpu_has_xmm) { 448 return tsk->thread.fpu.state->fxsave.mxcsr; 449 } else { 450 return MXCSR_DEFAULT; 451 } 452} 453 454static bool fpu_allocated(struct fpu *fpu) 455{ 456 return fpu->state != NULL; 457} 458 459static inline int fpu_alloc(struct fpu *fpu) 460{ 461 if (fpu_allocated(fpu)) 462 return 0; 463 fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); 464 if (!fpu->state) 465 return -ENOMEM; 466 WARN_ON((unsigned long)fpu->state & 15); 467 return 0; 468} 469 470static inline void fpu_free(struct fpu *fpu) 471{ 472 if (fpu->state) { 473 kmem_cache_free(task_xstate_cachep, fpu->state); 474 fpu->state = NULL; 475 } 476} 477 478static inline void fpu_copy(struct fpu *dst, struct fpu *src) 479{ 480 memcpy(dst->state, src->state, xstate_size); 481} 482 483extern void fpu_finit(struct fpu *fpu); 484 485#endif /* __ASSEMBLY__ */ 486 487#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 488#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 489 490#endif /* _ASM_X86_I387_H */ 491