1/*-
2 * Copyright (c) 2015-2016 The FreeBSD Foundation
3 *
4 * This software was developed by Andrew Turner under
5 * sponsorship from the FreeBSD Foundation.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30#ifdef VFP
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/limits.h>
34#include <sys/kernel.h>
35#include <sys/malloc.h>
36#include <sys/pcpu.h>
37#include <sys/proc.h>
38
39#include <vm/uma.h>
40
41#include <machine/armreg.h>
42#include <machine/md_var.h>
43#include <machine/pcb.h>
44#include <machine/vfp.h>
45
46/* Sanity check we can store all the VFP registers */
47CTASSERT(sizeof(((struct pcb *)0)->pcb_fpustate.vfp_regs) == 16 * 32);
48
49static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
50    "Kernel contexts for VFP state");
51
52struct fpu_kern_ctx {
53	struct vfpstate	*prev;
54#define	FPU_KERN_CTX_DUMMY	0x01	/* avoided save for the kern thread */
55#define	FPU_KERN_CTX_INUSE	0x02
56	uint32_t	 flags;
57	struct vfpstate	 state;
58};
59
60static uma_zone_t fpu_save_area_zone;
61static struct vfpstate *fpu_initialstate;
62
63void
64vfp_enable(void)
65{
66	uint32_t cpacr;
67
68	cpacr = READ_SPECIALREG(cpacr_el1);
69	cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_NONE;
70	WRITE_SPECIALREG(cpacr_el1, cpacr);
71	isb();
72}
73
74void
75vfp_disable(void)
76{
77	uint32_t cpacr;
78
79	cpacr = READ_SPECIALREG(cpacr_el1);
80	cpacr = (cpacr & ~CPACR_FPEN_MASK) | CPACR_FPEN_TRAP_ALL1;
81	WRITE_SPECIALREG(cpacr_el1, cpacr);
82	isb();
83}
84
85/*
86 * Called when the thread is dying or when discarding the kernel VFP state.
87 * If the thread was the last to use the VFP unit mark it as unused to tell
88 * the kernel the fp state is unowned. Ensure the VFP unit is off so we get
89 * an exception on the next access.
90 */
91void
92vfp_discard(struct thread *td)
93{
94
95#ifdef INVARIANTS
96	if (td != NULL)
97		CRITICAL_ASSERT(td);
98#endif
99	if (PCPU_GET(fpcurthread) == td)
100		PCPU_SET(fpcurthread, NULL);
101
102	vfp_disable();
103}
104
105void
106vfp_store(struct vfpstate *state)
107{
108	__uint128_t *vfp_state;
109	uint64_t fpcr, fpsr;
110
111	vfp_state = state->vfp_regs;
112	__asm __volatile(
113	    ".arch_extension fp\n"
114	    "mrs	%0, fpcr		\n"
115	    "mrs	%1, fpsr		\n"
116	    "stp	q0,  q1,  [%2, #16 *  0]\n"
117	    "stp	q2,  q3,  [%2, #16 *  2]\n"
118	    "stp	q4,  q5,  [%2, #16 *  4]\n"
119	    "stp	q6,  q7,  [%2, #16 *  6]\n"
120	    "stp	q8,  q9,  [%2, #16 *  8]\n"
121	    "stp	q10, q11, [%2, #16 * 10]\n"
122	    "stp	q12, q13, [%2, #16 * 12]\n"
123	    "stp	q14, q15, [%2, #16 * 14]\n"
124	    "stp	q16, q17, [%2, #16 * 16]\n"
125	    "stp	q18, q19, [%2, #16 * 18]\n"
126	    "stp	q20, q21, [%2, #16 * 20]\n"
127	    "stp	q22, q23, [%2, #16 * 22]\n"
128	    "stp	q24, q25, [%2, #16 * 24]\n"
129	    "stp	q26, q27, [%2, #16 * 26]\n"
130	    "stp	q28, q29, [%2, #16 * 28]\n"
131	    "stp	q30, q31, [%2, #16 * 30]\n"
132	    ".arch_extension nofp\n"
133	    : "=&r"(fpcr), "=&r"(fpsr) : "r"(vfp_state));
134
135	state->vfp_fpcr = fpcr;
136	state->vfp_fpsr = fpsr;
137}
138
139void
140vfp_restore(struct vfpstate *state)
141{
142	__uint128_t *vfp_state;
143	uint64_t fpcr, fpsr;
144
145	vfp_state = state->vfp_regs;
146	fpcr = state->vfp_fpcr;
147	fpsr = state->vfp_fpsr;
148
149	__asm __volatile(
150	    ".arch_extension fp\n"
151	    "ldp	q0,  q1,  [%2, #16 *  0]\n"
152	    "ldp	q2,  q3,  [%2, #16 *  2]\n"
153	    "ldp	q4,  q5,  [%2, #16 *  4]\n"
154	    "ldp	q6,  q7,  [%2, #16 *  6]\n"
155	    "ldp	q8,  q9,  [%2, #16 *  8]\n"
156	    "ldp	q10, q11, [%2, #16 * 10]\n"
157	    "ldp	q12, q13, [%2, #16 * 12]\n"
158	    "ldp	q14, q15, [%2, #16 * 14]\n"
159	    "ldp	q16, q17, [%2, #16 * 16]\n"
160	    "ldp	q18, q19, [%2, #16 * 18]\n"
161	    "ldp	q20, q21, [%2, #16 * 20]\n"
162	    "ldp	q22, q23, [%2, #16 * 22]\n"
163	    "ldp	q24, q25, [%2, #16 * 24]\n"
164	    "ldp	q26, q27, [%2, #16 * 26]\n"
165	    "ldp	q28, q29, [%2, #16 * 28]\n"
166	    "ldp	q30, q31, [%2, #16 * 30]\n"
167	    "msr	fpcr, %0		\n"
168	    "msr	fpsr, %1		\n"
169	    ".arch_extension nofp\n"
170	    : : "r"(fpcr), "r"(fpsr), "r"(vfp_state));
171}
172
173static void
174vfp_save_state_common(struct thread *td, struct pcb *pcb)
175{
176	uint32_t cpacr;
177
178	critical_enter();
179	/*
180	 * Only store the registers if the VFP is enabled,
181	 * i.e. return if we are trapping on FP access.
182	 */
183	cpacr = READ_SPECIALREG(cpacr_el1);
184	if ((cpacr & CPACR_FPEN_MASK) == CPACR_FPEN_TRAP_NONE) {
185		KASSERT(PCPU_GET(fpcurthread) == td,
186		    ("Storing an invalid VFP state"));
187
188		vfp_store(pcb->pcb_fpusaved);
189		dsb(ish);
190		vfp_disable();
191	}
192	critical_exit();
193}
194
195void
196vfp_save_state(struct thread *td, struct pcb *pcb)
197{
198	KASSERT(td != NULL, ("NULL vfp thread"));
199	KASSERT(pcb != NULL, ("NULL vfp pcb"));
200	KASSERT(td->td_pcb == pcb, ("Invalid vfp pcb"));
201
202	vfp_save_state_common(td, pcb);
203}
204
205void
206vfp_save_state_savectx(struct pcb *pcb)
207{
208	/*
209	 * savectx() will be called on panic with dumppcb as an argument,
210	 * dumppcb doesn't have pcb_fpusaved set, so set it to save
211	 * the VFP registers.
212	 */
213	MPASS(pcb->pcb_fpusaved == NULL);
214	pcb->pcb_fpusaved = &pcb->pcb_fpustate;
215
216	vfp_save_state_common(curthread, pcb);
217}
218
219void
220vfp_save_state_switch(struct thread *td)
221{
222	KASSERT(td != NULL, ("NULL vfp thread"));
223
224	vfp_save_state_common(td, td->td_pcb);
225}
226
227/*
228 * Update the VFP state for a forked process or new thread. The PCB will
229 * have been copied from the old thread.
230 */
231void
232vfp_new_thread(struct thread *newtd, struct thread *oldtd, bool fork)
233{
234	struct pcb *newpcb;
235
236	newpcb = newtd->td_pcb;
237
238	/* Kernel threads start with clean VFP */
239	if ((oldtd->td_pflags & TDP_KTHREAD) != 0) {
240		newpcb->pcb_fpflags &=
241		    ~(PCB_FP_STARTED | PCB_FP_KERN | PCB_FP_NOSAVE);
242	} else {
243		MPASS((newpcb->pcb_fpflags & (PCB_FP_KERN|PCB_FP_NOSAVE)) == 0);
244		if (!fork) {
245			newpcb->pcb_fpflags &= ~PCB_FP_STARTED;
246		}
247	}
248
249	newpcb->pcb_fpusaved = &newpcb->pcb_fpustate;
250	newpcb->pcb_vfpcpu = UINT_MAX;
251}
252
253/*
254 * Reset the FP state to avoid leaking state from the parent process across
255 * execve() (and to ensure that we get a consistent floating point environment
256 * in every new process).
257 */
258void
259vfp_reset_state(struct thread *td, struct pcb *pcb)
260{
261	/* Discard the threads VFP state before resetting it */
262	critical_enter();
263	vfp_discard(td);
264	critical_exit();
265
266	/*
267	 * Clear the thread state. The VFP is disabled and is not the current
268	 * VFP thread so we won't change any of these on context switch.
269	 */
270	bzero(&pcb->pcb_fpustate.vfp_regs, sizeof(pcb->pcb_fpustate.vfp_regs));
271	KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
272	    ("pcb_fpusaved should point to pcb_fpustate."));
273	pcb->pcb_fpustate.vfp_fpcr = VFPCR_INIT;
274	pcb->pcb_fpustate.vfp_fpsr = 0;
275	pcb->pcb_vfpcpu = UINT_MAX;
276	pcb->pcb_fpflags = 0;
277}
278
279void
280vfp_restore_state(void)
281{
282	struct pcb *curpcb;
283	u_int cpu;
284
285	critical_enter();
286
287	cpu = PCPU_GET(cpuid);
288	curpcb = curthread->td_pcb;
289	curpcb->pcb_fpflags |= PCB_FP_STARTED;
290
291	vfp_enable();
292
293	/*
294	 * If the previous thread on this cpu to use the VFP was not the
295	 * current thread, or the current thread last used it on a different
296	 * cpu we need to restore the old state.
297	 */
298	if (PCPU_GET(fpcurthread) != curthread || cpu != curpcb->pcb_vfpcpu) {
299		vfp_restore(curthread->td_pcb->pcb_fpusaved);
300		PCPU_SET(fpcurthread, curthread);
301		curpcb->pcb_vfpcpu = cpu;
302	}
303
304	critical_exit();
305}
306
307void
308vfp_init_secondary(void)
309{
310	uint64_t pfr;
311
312	/* Check if there is a vfp unit present */
313	pfr = READ_SPECIALREG(id_aa64pfr0_el1);
314	if ((pfr & ID_AA64PFR0_FP_MASK) == ID_AA64PFR0_FP_NONE)
315		return;
316
317	/* Disable to be enabled when it's used */
318	vfp_disable();
319}
320
321static void
322vfp_init(const void *dummy __unused)
323{
324	uint64_t pfr;
325
326	/* Check if there is a vfp unit present */
327	pfr = READ_SPECIALREG(id_aa64pfr0_el1);
328	if ((pfr & ID_AA64PFR0_FP_MASK) == ID_AA64PFR0_FP_NONE)
329		return;
330
331	fpu_save_area_zone = uma_zcreate("VFP_save_area",
332	    sizeof(struct vfpstate), NULL, NULL, NULL, NULL,
333	    _Alignof(struct vfpstate) - 1, 0);
334	fpu_initialstate = uma_zalloc(fpu_save_area_zone, M_WAITOK | M_ZERO);
335
336	/* Ensure the VFP is enabled before accessing it in vfp_store */
337	vfp_enable();
338	vfp_store(fpu_initialstate);
339
340	/* Disable to be enabled when it's used */
341	vfp_disable();
342
343	/* Zero the VFP registers but keep fpcr and fpsr */
344	bzero(fpu_initialstate->vfp_regs, sizeof(fpu_initialstate->vfp_regs));
345
346	thread0.td_pcb->pcb_fpusaved->vfp_fpcr = VFPCR_INIT;
347}
348
349SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL);
350
351struct fpu_kern_ctx *
352fpu_kern_alloc_ctx(u_int flags)
353{
354	struct fpu_kern_ctx *res;
355	size_t sz;
356
357	sz = sizeof(struct fpu_kern_ctx);
358	res = malloc(sz, M_FPUKERN_CTX, ((flags & FPU_KERN_NOWAIT) ?
359	    M_NOWAIT : M_WAITOK) | M_ZERO);
360	return (res);
361}
362
363void
364fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
365{
366
367	KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("free'ing inuse ctx"));
368	/* XXXAndrew clear the memory ? */
369	free(ctx, M_FPUKERN_CTX);
370}
371
372void
373fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
374{
375	struct pcb *pcb;
376
377	pcb = td->td_pcb;
378	KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL,
379	    ("ctx is required when !FPU_KERN_NOCTX"));
380	KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0,
381	    ("using inuse ctx"));
382	KASSERT((pcb->pcb_fpflags & PCB_FP_NOSAVE) == 0,
383	    ("recursive fpu_kern_enter while in PCB_FP_NOSAVE state"));
384
385	if ((flags & FPU_KERN_NOCTX) != 0) {
386		critical_enter();
387		if (curthread == PCPU_GET(fpcurthread)) {
388			vfp_save_state(curthread, pcb);
389		}
390		PCPU_SET(fpcurthread, NULL);
391
392		vfp_enable();
393		pcb->pcb_fpflags |= PCB_FP_KERN | PCB_FP_NOSAVE |
394		    PCB_FP_STARTED;
395		return;
396	}
397
398	if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
399		ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE;
400		return;
401	}
402	/*
403	 * Check either we are already using the VFP in the kernel, or
404	 * the saved state points to the default user space.
405	 */
406	KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 ||
407	    pcb->pcb_fpusaved == &pcb->pcb_fpustate,
408	    ("Mangled pcb_fpusaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_fpusaved, &pcb->pcb_fpustate));
409	ctx->flags = FPU_KERN_CTX_INUSE;
410	vfp_save_state(curthread, pcb);
411	ctx->prev = pcb->pcb_fpusaved;
412	pcb->pcb_fpusaved = &ctx->state;
413	pcb->pcb_fpflags |= PCB_FP_KERN;
414	pcb->pcb_fpflags &= ~PCB_FP_STARTED;
415
416	return;
417}
418
419int
420fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
421{
422	struct pcb *pcb;
423
424	pcb = td->td_pcb;
425
426	if ((pcb->pcb_fpflags & PCB_FP_NOSAVE) != 0) {
427		KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX"));
428		KASSERT(PCPU_GET(fpcurthread) == NULL,
429		    ("non-NULL fpcurthread for PCB_FP_NOSAVE"));
430		CRITICAL_ASSERT(td);
431
432		vfp_disable();
433		pcb->pcb_fpflags &= ~(PCB_FP_NOSAVE | PCB_FP_STARTED);
434		critical_exit();
435	} else {
436		KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
437		    ("FPU context not inuse"));
438		ctx->flags &= ~FPU_KERN_CTX_INUSE;
439
440		if (is_fpu_kern_thread(0) &&
441		    (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
442			return (0);
443		KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx"));
444		critical_enter();
445		vfp_discard(td);
446		critical_exit();
447		pcb->pcb_fpflags &= ~PCB_FP_STARTED;
448		pcb->pcb_fpusaved = ctx->prev;
449	}
450
451	if (pcb->pcb_fpusaved == &pcb->pcb_fpustate) {
452		pcb->pcb_fpflags &= ~PCB_FP_KERN;
453	} else {
454		KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0,
455		    ("unpaired fpu_kern_leave"));
456	}
457
458	return (0);
459}
460
461int
462fpu_kern_thread(u_int flags __unused)
463{
464	struct pcb *pcb = curthread->td_pcb;
465
466	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
467	    ("Only kthread may use fpu_kern_thread"));
468	KASSERT(pcb->pcb_fpusaved == &pcb->pcb_fpustate,
469	    ("Mangled pcb_fpusaved"));
470	KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0,
471	    ("Thread already setup for the VFP"));
472	pcb->pcb_fpflags |= PCB_FP_KERN;
473	return (0);
474}
475
476int
477is_fpu_kern_thread(u_int flags __unused)
478{
479	struct pcb *curpcb;
480
481	if ((curthread->td_pflags & TDP_KTHREAD) == 0)
482		return (0);
483	curpcb = curthread->td_pcb;
484	return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0);
485}
486
487/*
488 * FPU save area alloc/free/init utility routines
489 */
490struct vfpstate *
491fpu_save_area_alloc(void)
492{
493	return (uma_zalloc(fpu_save_area_zone, M_WAITOK));
494}
495
496void
497fpu_save_area_free(struct vfpstate *fsa)
498{
499	uma_zfree(fpu_save_area_zone, fsa);
500}
501
502void
503fpu_save_area_reset(struct vfpstate *fsa)
504{
505	memcpy(fsa, fpu_initialstate, sizeof(*fsa));
506}
507#endif
508