1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * In-kernel FPU support functions
4 *
5 *
6 * Consider these guidelines before using in-kernel FPU functions:
7 *
8 *  1. Use kernel_fpu_begin() and kernel_fpu_end() to enclose all in-kernel
9 *     use of floating-point or vector registers and instructions.
10 *
11 *  2. For kernel_fpu_begin(), specify the vector register range you want to
12 *     use with the KERNEL_VXR_* constants. Consider these usage guidelines:
13 *
14 *     a) If your function typically runs in process-context, use the lower
15 *	  half of the vector registers, for example, specify KERNEL_VXR_LOW.
16 *     b) If your function typically runs in soft-irq or hard-irq context,
17 *	  prefer using the upper half of the vector registers, for example,
18 *	  specify KERNEL_VXR_HIGH.
19 *
20 *     If you adhere to these guidelines, an interrupted process context
21 *     does not require to save and restore vector registers because of
22 *     disjoint register ranges.
23 *
24 *     Also note that the __kernel_fpu_begin()/__kernel_fpu_end() functions
25 *     includes logic to save and restore up to 16 vector registers at once.
26 *
27 *  3. You can nest kernel_fpu_begin()/kernel_fpu_end() by using different
28 *     struct kernel_fpu states.  Vector registers that are in use by outer
29 *     levels are saved and restored.  You can minimize the save and restore
30 *     effort by choosing disjoint vector register ranges.
31 *
32 *  5. To use vector floating-point instructions, specify the KERNEL_FPC
33 *     flag to save and restore floating-point controls in addition to any
34 *     vector register range.
35 *
36 *  6. To use floating-point registers and instructions only, specify the
37 *     KERNEL_FPR flag.  This flag triggers a save and restore of vector
38 *     registers V0 to V15 and floating-point controls.
39 *
40 * Copyright IBM Corp. 2015
41 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
42 */
43
44#ifndef _ASM_S390_FPU_H
45#define _ASM_S390_FPU_H
46
47#include <linux/processor.h>
48#include <linux/preempt.h>
49#include <linux/string.h>
50#include <linux/sched.h>
51#include <asm/sigcontext.h>
52#include <asm/fpu-types.h>
53#include <asm/fpu-insn.h>
54#include <asm/facility.h>
55
56static inline bool cpu_has_vx(void)
57{
58	return likely(test_facility(129));
59}
60
61enum {
62	KERNEL_FPC_BIT = 0,
63	KERNEL_VXR_V0V7_BIT,
64	KERNEL_VXR_V8V15_BIT,
65	KERNEL_VXR_V16V23_BIT,
66	KERNEL_VXR_V24V31_BIT,
67};
68
69#define KERNEL_FPC		BIT(KERNEL_FPC_BIT)
70#define KERNEL_VXR_V0V7		BIT(KERNEL_VXR_V0V7_BIT)
71#define KERNEL_VXR_V8V15	BIT(KERNEL_VXR_V8V15_BIT)
72#define KERNEL_VXR_V16V23	BIT(KERNEL_VXR_V16V23_BIT)
73#define KERNEL_VXR_V24V31	BIT(KERNEL_VXR_V24V31_BIT)
74
75#define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7   | KERNEL_VXR_V8V15)
76#define KERNEL_VXR_MID		(KERNEL_VXR_V8V15  | KERNEL_VXR_V16V23)
77#define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23 | KERNEL_VXR_V24V31)
78
79#define KERNEL_VXR		(KERNEL_VXR_LOW	   | KERNEL_VXR_HIGH)
80#define KERNEL_FPR		(KERNEL_FPC	   | KERNEL_VXR_LOW)
81
82void load_fpu_state(struct fpu *state, int flags);
83void save_fpu_state(struct fpu *state, int flags);
84void __kernel_fpu_begin(struct kernel_fpu *state, int flags);
85void __kernel_fpu_end(struct kernel_fpu *state, int flags);
86
87static __always_inline void save_vx_regs(__vector128 *vxrs)
88{
89	fpu_vstm(0, 15, &vxrs[0]);
90	fpu_vstm(16, 31, &vxrs[16]);
91}
92
93static __always_inline void load_vx_regs(__vector128 *vxrs)
94{
95	fpu_vlm(0, 15, &vxrs[0]);
96	fpu_vlm(16, 31, &vxrs[16]);
97}
98
99static __always_inline void __save_fp_regs(freg_t *fprs, unsigned int offset)
100{
101	fpu_std(0, &fprs[0 * offset]);
102	fpu_std(1, &fprs[1 * offset]);
103	fpu_std(2, &fprs[2 * offset]);
104	fpu_std(3, &fprs[3 * offset]);
105	fpu_std(4, &fprs[4 * offset]);
106	fpu_std(5, &fprs[5 * offset]);
107	fpu_std(6, &fprs[6 * offset]);
108	fpu_std(7, &fprs[7 * offset]);
109	fpu_std(8, &fprs[8 * offset]);
110	fpu_std(9, &fprs[9 * offset]);
111	fpu_std(10, &fprs[10 * offset]);
112	fpu_std(11, &fprs[11 * offset]);
113	fpu_std(12, &fprs[12 * offset]);
114	fpu_std(13, &fprs[13 * offset]);
115	fpu_std(14, &fprs[14 * offset]);
116	fpu_std(15, &fprs[15 * offset]);
117}
118
119static __always_inline void __load_fp_regs(freg_t *fprs, unsigned int offset)
120{
121	fpu_ld(0, &fprs[0 * offset]);
122	fpu_ld(1, &fprs[1 * offset]);
123	fpu_ld(2, &fprs[2 * offset]);
124	fpu_ld(3, &fprs[3 * offset]);
125	fpu_ld(4, &fprs[4 * offset]);
126	fpu_ld(5, &fprs[5 * offset]);
127	fpu_ld(6, &fprs[6 * offset]);
128	fpu_ld(7, &fprs[7 * offset]);
129	fpu_ld(8, &fprs[8 * offset]);
130	fpu_ld(9, &fprs[9 * offset]);
131	fpu_ld(10, &fprs[10 * offset]);
132	fpu_ld(11, &fprs[11 * offset]);
133	fpu_ld(12, &fprs[12 * offset]);
134	fpu_ld(13, &fprs[13 * offset]);
135	fpu_ld(14, &fprs[14 * offset]);
136	fpu_ld(15, &fprs[15 * offset]);
137}
138
139static __always_inline void save_fp_regs(freg_t *fprs)
140{
141	__save_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
142}
143
144static __always_inline void load_fp_regs(freg_t *fprs)
145{
146	__load_fp_regs(fprs, sizeof(freg_t) / sizeof(freg_t));
147}
148
149static __always_inline void save_fp_regs_vx(__vector128 *vxrs)
150{
151	freg_t *fprs = (freg_t *)&vxrs[0].high;
152
153	__save_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
154}
155
156static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
157{
158	freg_t *fprs = (freg_t *)&vxrs[0].high;
159
160	__load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
161}
162
163static inline void load_user_fpu_regs(void)
164{
165	struct thread_struct *thread = &current->thread;
166
167	if (!thread->ufpu_flags)
168		return;
169	load_fpu_state(&thread->ufpu, thread->ufpu_flags);
170	thread->ufpu_flags = 0;
171}
172
173static __always_inline void __save_user_fpu_regs(struct thread_struct *thread, int flags)
174{
175	save_fpu_state(&thread->ufpu, flags);
176	__atomic_or(flags, &thread->ufpu_flags);
177}
178
179static inline void save_user_fpu_regs(void)
180{
181	struct thread_struct *thread = &current->thread;
182	int mask, flags;
183
184	mask = __atomic_or(KERNEL_FPC | KERNEL_VXR, &thread->kfpu_flags);
185	flags = ~READ_ONCE(thread->ufpu_flags) & (KERNEL_FPC | KERNEL_VXR);
186	if (flags)
187		__save_user_fpu_regs(thread, flags);
188	barrier();
189	WRITE_ONCE(thread->kfpu_flags, mask);
190}
191
192static __always_inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
193{
194	struct thread_struct *thread = &current->thread;
195	int mask, uflags;
196
197	mask = __atomic_or(flags, &thread->kfpu_flags);
198	state->hdr.mask = mask;
199	uflags = READ_ONCE(thread->ufpu_flags);
200	if ((uflags & flags) != flags)
201		__save_user_fpu_regs(thread, ~uflags & flags);
202	if (mask & flags)
203		__kernel_fpu_begin(state, flags);
204}
205
206static __always_inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
207{
208	int mask = state->hdr.mask;
209
210	if (mask & flags)
211		__kernel_fpu_end(state, flags);
212	barrier();
213	WRITE_ONCE(current->thread.kfpu_flags, mask);
214}
215
216void __kernel_fpu_invalid_size(void);
217
218static __always_inline void kernel_fpu_check_size(int flags, unsigned int size)
219{
220	unsigned int cnt = 0;
221
222	if (flags & KERNEL_VXR_V0V7)
223		cnt += 8;
224	if (flags & KERNEL_VXR_V8V15)
225		cnt += 8;
226	if (flags & KERNEL_VXR_V16V23)
227		cnt += 8;
228	if (flags & KERNEL_VXR_V24V31)
229		cnt += 8;
230	if (cnt != size)
231		__kernel_fpu_invalid_size();
232}
233
234#define kernel_fpu_begin(state, flags)					\
235{									\
236	typeof(state) s = (state);					\
237	int _flags = (flags);						\
238									\
239	kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs));		\
240	_kernel_fpu_begin((struct kernel_fpu *)s, _flags);		\
241}
242
243#define kernel_fpu_end(state, flags)					\
244{									\
245	typeof(state) s = (state);					\
246	int _flags = (flags);						\
247									\
248	kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs));		\
249	_kernel_fpu_end((struct kernel_fpu *)s, _flags);		\
250}
251
252static inline void save_kernel_fpu_regs(struct thread_struct *thread)
253{
254	if (!thread->kfpu_flags)
255		return;
256	save_fpu_state(&thread->kfpu, thread->kfpu_flags);
257}
258
259static inline void restore_kernel_fpu_regs(struct thread_struct *thread)
260{
261	if (!thread->kfpu_flags)
262		return;
263	load_fpu_state(&thread->kfpu, thread->kfpu_flags);
264}
265
266static inline void convert_vx_to_fp(freg_t *fprs, __vector128 *vxrs)
267{
268	int i;
269
270	for (i = 0; i < __NUM_FPRS; i++)
271		fprs[i].ui = vxrs[i].high;
272}
273
274static inline void convert_fp_to_vx(__vector128 *vxrs, freg_t *fprs)
275{
276	int i;
277
278	for (i = 0; i < __NUM_FPRS; i++)
279		vxrs[i].high = fprs[i].ui;
280}
281
282static inline void fpregs_store(_s390_fp_regs *fpregs, struct fpu *fpu)
283{
284	fpregs->pad = 0;
285	fpregs->fpc = fpu->fpc;
286	convert_vx_to_fp((freg_t *)&fpregs->fprs, fpu->vxrs);
287}
288
289static inline void fpregs_load(_s390_fp_regs *fpregs, struct fpu *fpu)
290{
291	fpu->fpc = fpregs->fpc;
292	convert_fp_to_vx(fpu->vxrs, (freg_t *)&fpregs->fprs);
293}
294
295#endif /* _ASM_S390_FPU_H */
296