1/*	$OpenBSD: fpu.h,v 1.20 2024/04/14 09:59:04 kettenis Exp $	*/
2/*	$NetBSD: fpu.h,v 1.1 2003/04/26 18:39:40 fvdl Exp $	*/
3
4#ifndef	_MACHINE_FPU_H_
5#define	_MACHINE_FPU_H_
6
7#include <sys/types.h>
8
9/*
10 * If the CPU supports xsave/xrstor then we use them so that we can provide
11 * AVX support.  Otherwise we require fxsave/fxrstor, as the SSE registers
12 * are part of the ABI for passing floating point values.
13 * While fxsave/fxrstor only required 16-byte alignment for the save area,
14 * xsave/xrstor requires the save area to have 64-byte alignment.
15 */
16
17struct fxsave64 {
18	u_int16_t  fx_fcw;
19	u_int16_t  fx_fsw;
20	u_int8_t   fx_ftw;
21	u_int8_t   fx_unused1;
22	u_int16_t  fx_fop;
23	u_int64_t  fx_rip;
24	u_int64_t  fx_rdp;
25	u_int32_t  fx_mxcsr;
26	u_int32_t  fx_mxcsr_mask;
27	u_int64_t  fx_st[8][2];   /* 8 normal FP regs */
28	u_int64_t  fx_xmm[16][2]; /* 16 SSE2 registers */
29	u_int8_t   fx_unused3[96];
30} __packed;
31
32struct xstate_hdr {
33	uint64_t	xstate_bv;
34	uint64_t	xstate_xcomp_bv;
35	uint8_t		xstate_rsrv0[8];
36	uint8_t		xstate_rsrv[40];
37} __packed;
38
39struct savefpu {
40	struct fxsave64 fp_fxsave;	/* see above */
41	struct xstate_hdr fp_xstate;
42	u_int64_t fp_ymm[16][2];
43	u_int8_t fp_components[1856];	/* enough for AVX-512 */
44};
45
46/*
47 * The i387 defaults to Intel extended precision mode and round to nearest,
48 * with all exceptions masked.
49 */
50#define	__INITIAL_NPXCW__	0x037f
51#define __INITIAL_MXCSR__ 	0x1f80
52#define __INITIAL_MXCSR_MASK__	0xffbf
53
54#ifdef _KERNEL
55/*
56 * XXX
57 */
58struct trapframe;
59struct cpu_info;
60
61extern size_t	fpu_save_len;
62extern uint32_t	fpu_mxcsr_mask;
63extern uint64_t	xsave_mask;
64extern int cpu_use_xsaves;
65
66void fpuinit(struct cpu_info *);
67int fputrap(int _type);
68void fpusave(struct savefpu *);
69void fpusavereset(struct savefpu *);
70void fpu_kernel_enter(void);
71void fpu_kernel_exit(void);
72
73/* pointer to fxsave/xsave/xsaves data with everything reset */
74#define	fpu_cleandata	(&proc0.p_addr->u_pcb.pcb_savefpu)
75
76int	xrstor_user(struct savefpu *_addr, uint64_t _mask);
77void	xrstor_kern(struct savefpu *_addr, uint64_t _mask);
78#define	fpureset() \
79	xrstor_kern(fpu_cleandata, xsave_mask)
80int	xsetbv_user(uint32_t _reg, uint64_t _mask);
81
82#define fninit()		__asm("fninit")
83#define fwait()			__asm("fwait")
84/* should be fxsave64, but where we use this it doesn't matter */
85#define fxsave(addr)		__asm("fxsave %0" : "=m" (*addr))
86#define ldmxcsr(addr)		__asm("ldmxcsr %0" : : "m" (*addr))
87#define fldcw(addr)		__asm("fldcw %0" : : "m" (*addr))
88
89static inline void
90xsave(struct savefpu *addr, uint64_t mask)
91{
92	uint32_t lo, hi;
93
94	lo = mask;
95	hi = mask >> 32;
96	__asm volatile("xsave64 %0" : "+m" (*addr) : "a" (lo), "d" (hi));
97}
98
99static inline void
100xrstors(const struct savefpu *addr, uint64_t mask)
101{
102	uint32_t lo, hi;
103
104	lo = mask;
105	hi = mask >> 32;
106	__asm volatile("xrstors64 %0" : : "m" (*addr), "a" (lo), "d" (hi));
107}
108
109#endif
110
111#endif /* _MACHINE_FPU_H_ */
112