1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2014 Ian Lepore <ian@freebsd.org>
5 * Copyright (c) 2012 Mark Tinguely
6 *
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/param.h>
32#include <sys/systm.h>
33#include <sys/kernel.h>
34#include <sys/limits.h>
35#include <sys/malloc.h>
36#include <sys/proc.h>
37
38#include <machine/armreg.h>
39#include <machine/elf.h>
40#include <machine/frame.h>
41#include <machine/md_var.h>
42#include <machine/pcb.h>
43#include <machine/undefined.h>
44#include <machine/vfp.h>
45
46/* function prototypes */
47static int vfp_bounce(u_int, u_int, struct trapframe *, int);
48static void vfp_restore(struct vfp_state *);
49
50extern int vfp_exists;
51static struct undefined_handler vfp10_uh, vfp11_uh;
52/* If true the VFP unit has 32 double registers, otherwise it has 16 */
53static int is_d32;
54
55static MALLOC_DEFINE(M_FPUKERN_CTX, "fpukern_ctx",
56    "Kernel contexts for VFP state");
57
58struct fpu_kern_ctx {
59	struct vfp_state	*prev;
60#define	FPU_KERN_CTX_DUMMY	0x01	/* avoided save for the kern thread */
61#define	FPU_KERN_CTX_INUSE	0x02
62	uint32_t	 flags;
63	struct vfp_state	 state;
64};
65
66/*
67 * About .fpu directives in this file...
68 *
69 * We should need simply .fpu vfpv3, but clang 3.5 has a quirk where setting
70 * vfpv3 doesn't imply that vfp2 features are also available -- both have to be
71 * explicitly set to get all the features of both.  This is probably a bug in
72 * clang, so it may get fixed and require changes here some day.  Other changes
73 * are probably coming in clang too, because there is email and open PRs
74 * indicating they want to completely disable the ability to use .fpu and
75 * similar directives in inline asm.  That would be catastrophic for us,
76 * hopefully they come to their senses.  There was also some discusion of a new
77 * syntax such as .push fpu=vfpv3; ...; .pop fpu; and that would be ideal for
78 * us, better than what we have now really.
79 *
80 * For gcc, each .fpu directive completely overrides the prior directive, unlike
81 * with clang, but luckily on gcc saying v3 implies all the v2 features as well.
82 */
83
84#define fmxr(reg, val) \
85    __asm __volatile("	.fpu vfpv2\n .fpu vfpv3\n"			\
86		     "	vmsr	" __STRING(reg) ", %0"   :: "r"(val));
87
88#define fmrx(reg) \
89({ u_int val = 0;\
90    __asm __volatile(" .fpu vfpv2\n .fpu vfpv3\n"			\
91		     "	vmrs	%0, " __STRING(reg) : "=r"(val));	\
92    val; \
93})
94
95static u_int
96get_coprocessorACR(void)
97{
98	u_int val;
99	__asm __volatile("mrc p15, 0, %0, c1, c0, 2" : "=r" (val) : : "cc");
100	return val;
101}
102
103static void
104set_coprocessorACR(u_int val)
105{
106	__asm __volatile("mcr p15, 0, %0, c1, c0, 2\n\t"
107	 : : "r" (val) : "cc");
108	isb();
109}
110
111static void
112vfp_enable(void)
113{
114	uint32_t fpexc;
115
116	fpexc = fmrx(fpexc);
117	fmxr(fpexc, fpexc | VFPEXC_EN);
118	isb();
119}
120
121static void
122vfp_disable(void)
123{
124	uint32_t fpexc;
125
126	fpexc = fmrx(fpexc);
127	fmxr(fpexc, fpexc & ~VFPEXC_EN);
128	isb();
129}
130
131	/* called for each cpu */
132void
133vfp_init(void)
134{
135	u_int fpsid, tmp;
136	u_int coproc, vfp_arch;
137
138	coproc = get_coprocessorACR();
139	coproc |= COPROC10 | COPROC11;
140	set_coprocessorACR(coproc);
141
142	fpsid = fmrx(fpsid);		/* read the vfp system id */
143
144	if (!(fpsid & VFPSID_HARDSOFT_IMP)) {
145		vfp_exists = 1;
146		is_d32 = 0;
147		PCPU_SET(vfpsid, fpsid);	/* save the fpsid */
148		elf_hwcap |= HWCAP_VFP;
149
150		vfp_arch =
151		    (fpsid & VFPSID_SUBVERSION2_MASK) >> VFPSID_SUBVERSION_OFF;
152
153		if (vfp_arch >= VFP_ARCH3) {
154			tmp = fmrx(mvfr0);
155			PCPU_SET(vfpmvfr0, tmp);
156			elf_hwcap |= HWCAP_VFPv3;
157
158			if ((tmp & VMVFR0_RB_MASK) == 2) {
159				elf_hwcap |= HWCAP_VFPD32;
160				is_d32 = 1;
161			} else
162				elf_hwcap |= HWCAP_VFPv3D16;
163
164			tmp = fmrx(mvfr1);
165			PCPU_SET(vfpmvfr1, tmp);
166
167			if (PCPU_GET(cpuid) == 0) {
168				if ((tmp & VMVFR1_FZ_MASK) == 0x1) {
169					/* Denormals arithmetic support */
170					initial_fpscr &= ~VFPSCR_FZ;
171					thread0.td_pcb->pcb_vfpstate.fpscr =
172					    initial_fpscr;
173				}
174			}
175
176			if ((tmp & VMVFR1_LS_MASK) >> VMVFR1_LS_OFF == 1 &&
177			    (tmp & VMVFR1_I_MASK) >> VMVFR1_I_OFF == 1 &&
178			    (tmp & VMVFR1_SP_MASK) >> VMVFR1_SP_OFF == 1)
179				elf_hwcap |= HWCAP_NEON;
180			if ((tmp & VMVFR1_FMAC_MASK) >>  VMVFR1_FMAC_OFF == 1)
181				elf_hwcap |= HWCAP_VFPv4;
182		}
183
184		vfp_disable();
185
186		/* initialize the coprocess 10 and 11 calls
187		 * These are called to restore the registers and enable
188		 * the VFP hardware.
189		 */
190		if (vfp10_uh.uh_handler == NULL) {
191			vfp10_uh.uh_handler = vfp_bounce;
192			vfp11_uh.uh_handler = vfp_bounce;
193			install_coproc_handler_static(10, &vfp10_uh);
194			install_coproc_handler_static(11, &vfp11_uh);
195		}
196	}
197}
198
199SYSINIT(vfp, SI_SUB_CPU, SI_ORDER_ANY, vfp_init, NULL);
200
201/*
202 * Start the VFP unit, restore the VFP registers from the PCB and retry
203 * the instruction.
204 */
205static int
206vfp_bounce(u_int addr, u_int insn, struct trapframe *frame, int code)
207{
208	u_int cpu, fpexc;
209	struct pcb *curpcb;
210	ksiginfo_t ksi;
211
212	critical_enter();
213
214	/*
215	 * If the VFP is already on and we got an undefined instruction, then
216	 * something tried to executate a truly invalid instruction that maps to
217	 * the VFP.
218	 */
219	fpexc = fmrx(fpexc);
220	if (fpexc & VFPEXC_EN) {
221		/* Clear any exceptions */
222		fmxr(fpexc, fpexc & ~(VFPEXC_EX | VFPEXC_FP2V));
223
224		/* kill the process - we do not handle emulation */
225		critical_exit();
226
227		if (fpexc & VFPEXC_EX) {
228			/* We have an exception, signal a SIGFPE */
229			ksiginfo_init_trap(&ksi);
230			ksi.ksi_signo = SIGFPE;
231			if (fpexc & VFPEXC_UFC)
232				ksi.ksi_code = FPE_FLTUND;
233			else if (fpexc & VFPEXC_OFC)
234				ksi.ksi_code = FPE_FLTOVF;
235			else if (fpexc & VFPEXC_IOC)
236				ksi.ksi_code = FPE_FLTINV;
237			ksi.ksi_addr = (void *)addr;
238			trapsignal(curthread, &ksi);
239			return 0;
240		}
241
242		return 1;
243	}
244
245	curpcb = curthread->td_pcb;
246	if ((code & FAULT_USER) == 0 &&
247	    (curpcb->pcb_fpflags & PCB_FP_KERN) == 0) {
248		critical_exit();
249		return (1);
250	}
251
252	/*
253	 * If the last time this thread used the VFP it was on this core, and
254	 * the last thread to use the VFP on this core was this thread, then the
255	 * VFP state is valid, otherwise restore this thread's state to the VFP.
256	 */
257	fmxr(fpexc, fpexc | VFPEXC_EN);
258	cpu = PCPU_GET(cpuid);
259	if (curpcb->pcb_vfpcpu != cpu || curthread != PCPU_GET(fpcurthread)) {
260		vfp_restore(curpcb->pcb_vfpsaved);
261		curpcb->pcb_vfpcpu = cpu;
262		PCPU_SET(fpcurthread, curthread);
263	}
264
265	critical_exit();
266
267	KASSERT((code & FAULT_USER) == 0 ||
268	    curpcb->pcb_vfpsaved == &curpcb->pcb_vfpstate,
269	    ("Kernel VFP state in use when entering userspace"));
270
271	return (0);
272}
273
274/*
275 * Update the VFP state for a forked process or new thread. The PCB will
276 * have been copied from the old thread.
277 * The code is heavily based on arm64 logic.
278 */
279void
280vfp_new_thread(struct thread *newtd, struct thread *oldtd, bool fork)
281{
282	struct pcb *newpcb;
283
284	newpcb = newtd->td_pcb;
285
286	/* Kernel threads start with clean VFP */
287	if ((oldtd->td_pflags & TDP_KTHREAD) != 0) {
288		newpcb->pcb_fpflags &=
289		    ~(PCB_FP_STARTED | PCB_FP_KERN | PCB_FP_NOSAVE);
290	} else {
291		MPASS((newpcb->pcb_fpflags & (PCB_FP_KERN|PCB_FP_NOSAVE)) == 0);
292		if (!fork) {
293			newpcb->pcb_fpflags &= ~PCB_FP_STARTED;
294		}
295	}
296
297	newpcb->pcb_vfpsaved = &newpcb->pcb_vfpstate;
298	newpcb->pcb_vfpcpu = UINT_MAX;
299}
300/*
301 * Restore the given state to the VFP hardware.
302 */
303static void
304vfp_restore(struct vfp_state *vfpsave)
305{
306	uint32_t fpexc;
307
308	/* On vfpv3 we may need to restore FPINST and FPINST2 */
309	fpexc = vfpsave->fpexec;
310	if (fpexc & VFPEXC_EX) {
311		fmxr(fpinst, vfpsave->fpinst);
312		if (fpexc & VFPEXC_FP2V)
313			fmxr(fpinst2, vfpsave->fpinst2);
314	}
315	fmxr(fpscr, vfpsave->fpscr);
316
317	__asm __volatile(
318	    " .fpu	vfpv2\n"
319	    " .fpu	vfpv3\n"
320	    " vldmia	%0!, {d0-d15}\n"	/* d0-d15 */
321	    " cmp	%1, #0\n"		/* -D16 or -D32? */
322	    " vldmiane	%0!, {d16-d31}\n"	/* d16-d31 */
323	    " addeq	%0, %0, #128\n"		/* skip missing regs */
324	    : "+&r" (vfpsave) : "r" (is_d32) : "cc"
325	    );
326
327	fmxr(fpexc, fpexc);
328}
329
330/*
331 * If the VFP is on, save its current state and turn it off if requested to do
332 * so.  If the VFP is not on, does not change the values at *vfpsave.  Caller is
333 * responsible for preventing a context switch while this is running.
334 */
335void
336vfp_store(struct vfp_state *vfpsave, boolean_t disable_vfp)
337{
338	uint32_t fpexc;
339
340	fpexc = fmrx(fpexc);		/* Is the vfp enabled? */
341	if (fpexc & VFPEXC_EN) {
342		vfpsave->fpexec = fpexc;
343		vfpsave->fpscr = fmrx(fpscr);
344
345		/* On vfpv3 we may need to save FPINST and FPINST2 */
346		if (fpexc & VFPEXC_EX) {
347			vfpsave->fpinst = fmrx(fpinst);
348			if (fpexc & VFPEXC_FP2V)
349				vfpsave->fpinst2 = fmrx(fpinst2);
350			fpexc &= ~VFPEXC_EX;
351		}
352
353		__asm __volatile(
354		    " .fpu	vfpv2\n"
355		    " .fpu	vfpv3\n"
356		    " vstmia	%0!, {d0-d15}\n"	/* d0-d15 */
357		    " cmp	%1, #0\n"		/* -D16 or -D32? */
358		    " vstmiane	%0!, {d16-d31}\n"	/* d16-d31 */
359		    " addeq	%0, %0, #128\n"		/* skip missing regs */
360		    : "+&r" (vfpsave) : "r" (is_d32) : "cc"
361		    );
362
363		if (disable_vfp)
364			fmxr(fpexc , fpexc & ~VFPEXC_EN);
365	}
366}
367
368/*
369 * The current thread is dying.  If the state currently in the hardware belongs
370 * to the current thread, set fpcurthread to NULL to indicate that the VFP
371 * hardware state does not belong to any thread.  If the VFP is on, turn it off.
372 */
373void
374vfp_discard(struct thread *td)
375{
376	u_int tmp;
377
378	if (PCPU_GET(fpcurthread) == td)
379		PCPU_SET(fpcurthread, NULL);
380
381	tmp = fmrx(fpexc);
382	if (tmp & VFPEXC_EN)
383		fmxr(fpexc, tmp & ~VFPEXC_EN);
384}
385
386void
387vfp_save_state(struct thread *td, struct pcb *pcb)
388{
389	int32_t fpexc;
390
391	KASSERT(pcb != NULL, ("NULL vfp pcb"));
392	KASSERT(td == NULL || td->td_pcb == pcb, ("Invalid vfp pcb"));
393
394	/*
395	 * savectx() will be called on panic with dumppcb as an argument,
396	 * dumppcb doesn't have pcb_vfpsaved set, so set it to save
397	 * the VFP registers.
398	 */
399	if (pcb->pcb_vfpsaved == NULL)
400		pcb->pcb_vfpsaved = &pcb->pcb_vfpstate;
401
402	if (td == NULL)
403		td = curthread;
404
405	critical_enter();
406	/*
407	 * Only store the registers if the VFP is enabled,
408	 * i.e. return if we are trapping on FP access.
409	 */
410	fpexc = fmrx(fpexc);
411	if (fpexc & VFPEXC_EN) {
412		KASSERT(PCPU_GET(fpcurthread) == td,
413		    ("Storing an invalid VFP state"));
414
415		vfp_store(pcb->pcb_vfpsaved, true);
416	}
417	critical_exit();
418}
419
420struct fpu_kern_ctx *
421fpu_kern_alloc_ctx(u_int flags)
422{
423	return (malloc(sizeof(struct fpu_kern_ctx), M_FPUKERN_CTX,
424	    ((flags & FPU_KERN_NOWAIT) ? M_NOWAIT : M_WAITOK) | M_ZERO));
425}
426
427void
428fpu_kern_free_ctx(struct fpu_kern_ctx *ctx)
429{
430	KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) == 0, ("freeing in-use ctx"));
431
432	free(ctx, M_FPUKERN_CTX);
433}
434
435void
436fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
437{
438	struct pcb *pcb;
439
440	pcb = td->td_pcb;
441	KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL,
442	    ("ctx is required when !FPU_KERN_NOCTX"));
443	KASSERT(ctx == NULL || (ctx->flags & FPU_KERN_CTX_INUSE) == 0,
444	    ("using inuse ctx"));
445	KASSERT((pcb->pcb_fpflags & PCB_FP_NOSAVE) == 0,
446	    ("recursive fpu_kern_enter while in PCB_FP_NOSAVE state"));
447
448	if ((flags & FPU_KERN_NOCTX) != 0) {
449		critical_enter();
450		if (curthread == PCPU_GET(fpcurthread)) {
451			vfp_save_state(curthread, pcb);
452		}
453		PCPU_SET(fpcurthread, NULL);
454
455		vfp_enable();
456		pcb->pcb_fpflags |= PCB_FP_KERN | PCB_FP_NOSAVE |
457		    PCB_FP_STARTED;
458		return;
459	}
460
461	if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
462		ctx->flags = FPU_KERN_CTX_DUMMY | FPU_KERN_CTX_INUSE;
463		return;
464	}
465	/*
466	 * Check either we are already using the VFP in the kernel, or
467	 * the the saved state points to the default user space.
468	 */
469	KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0 ||
470	    pcb->pcb_vfpsaved == &pcb->pcb_vfpstate,
471	    ("Mangled pcb_vfpsaved %x %p %p", pcb->pcb_fpflags, pcb->pcb_vfpsaved,
472	     &pcb->pcb_vfpstate));
473	ctx->flags = FPU_KERN_CTX_INUSE;
474	vfp_save_state(curthread, pcb);
475	ctx->prev = pcb->pcb_vfpsaved;
476	pcb->pcb_vfpsaved = &ctx->state;
477	pcb->pcb_fpflags |= PCB_FP_KERN;
478	pcb->pcb_fpflags &= ~PCB_FP_STARTED;
479
480	return;
481}
482
483int
484fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
485{
486	struct pcb *pcb;
487
488	pcb = td->td_pcb;
489
490	if ((pcb->pcb_fpflags & PCB_FP_NOSAVE) != 0) {
491		KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX"));
492		KASSERT(PCPU_GET(fpcurthread) == NULL,
493		    ("non-NULL fpcurthread for PCB_FP_NOSAVE"));
494		CRITICAL_ASSERT(td);
495
496		vfp_disable();
497		pcb->pcb_fpflags &= ~(PCB_FP_NOSAVE | PCB_FP_STARTED);
498		critical_exit();
499	} else {
500		KASSERT((ctx->flags & FPU_KERN_CTX_INUSE) != 0,
501		    ("FPU context not inuse"));
502		ctx->flags &= ~FPU_KERN_CTX_INUSE;
503
504		if (is_fpu_kern_thread(0) &&
505		    (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
506			return (0);
507		KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx"));
508		critical_enter();
509		vfp_discard(td);
510		critical_exit();
511		pcb->pcb_fpflags &= ~PCB_FP_STARTED;
512		pcb->pcb_vfpsaved = ctx->prev;
513	}
514
515	if (pcb->pcb_vfpsaved == &pcb->pcb_vfpstate) {
516		pcb->pcb_fpflags &= ~PCB_FP_KERN;
517	} else {
518		KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) != 0,
519		    ("unpaired fpu_kern_leave"));
520	}
521
522	return (0);
523}
524
525int
526fpu_kern_thread(u_int flags __unused)
527{
528	struct pcb *pcb = curthread->td_pcb;
529
530	KASSERT((curthread->td_pflags & TDP_KTHREAD) != 0,
531	    ("Only kthread may use fpu_kern_thread"));
532	KASSERT(pcb->pcb_vfpsaved == &pcb->pcb_vfpstate,
533	    ("Mangled pcb_vfpsaved"));
534	KASSERT((pcb->pcb_fpflags & PCB_FP_KERN) == 0,
535	    ("Thread already setup for the VFP"));
536	pcb->pcb_fpflags |= PCB_FP_KERN;
537	return (0);
538}
539
540int
541is_fpu_kern_thread(u_int flags __unused)
542{
543	struct pcb *curpcb;
544
545	if ((curthread->td_pflags & TDP_KTHREAD) == 0)
546		return (0);
547	curpcb = curthread->td_pcb;
548	return ((curpcb->pcb_fpflags & PCB_FP_KERN) != 0);
549}
550