altivec.c revision 1.10
1/*	$NetBSD: altivec.c,v 1.10 2005/12/11 12:18:43 christos Exp $	*/
2
3/*
4 * Copyright (C) 1996 Wolfgang Solfrank.
5 * Copyright (C) 1996 TooLs GmbH.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by TooLs GmbH.
19 * 4. The name of TooLs GmbH may not be used to endorse or promote products
20 *    derived from this software without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
28 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
30 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
31 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__KERNEL_RCSID(0, "$NetBSD: altivec.c,v 1.10 2005/12/11 12:18:43 christos Exp $");
36
37#include "opt_multiprocessor.h"
38
39#include <sys/param.h>
40#include <sys/proc.h>
41#include <sys/sa.h>
42#include <sys/systm.h>
43#include <sys/user.h>
44#include <sys/malloc.h>
45#include <sys/pool.h>
46
47#include <uvm/uvm_extern.h>
48
49#include <powerpc/altivec.h>
50#include <powerpc/spr.h>
51#include <powerpc/psl.h>
52
53void
54enable_vec(void)
55{
56	struct cpu_info *ci = curcpu();
57	struct lwp *l = curlwp;
58	struct pcb *pcb = &l->l_addr->u_pcb;
59	struct trapframe *tf = trapframe(l);
60	struct vreg *vr = &pcb->pcb_vr;
61	register_t msr;
62
63	KASSERT(pcb->pcb_veccpu == NULL);
64
65	pcb->pcb_flags |= PCB_ALTIVEC;
66
67	/*
68	 * Enable AltiVec temporarily (and disable interrupts).
69	 */
70	msr = mfmsr();
71	mtmsr((msr & ~PSL_EE) | PSL_VEC);
72	__asm __volatile ("isync");
73	if (ci->ci_veclwp) {
74		save_vec_cpu();
75	}
76	KASSERT(curcpu()->ci_veclwp == NULL);
77
78	/*
79	 * Restore VSCR by first loading it into a vector and then into VSCR.
80	 * (this needs to done before loading the user's vector registers
81	 * since we need to use a scratch vector register)
82	 */
83	__asm __volatile("vxor %2,%2,%2; lvewx %2,%0,%1; mtvscr %2" \
84	    ::	"b"(vr), "r"(offsetof(struct vreg, vscr)), "n"(0));
85
86	/*
87	 * VRSAVE will be restored when trap frame returns
88	 */
89	tf->tf_xtra[TF_VRSAVE] = vr->vrsave;
90
91#define	LVX(n,vr)	__asm /*__volatile*/("lvx %2,%0,%1" \
92	    ::	"b"(vr), "r"(offsetof(struct vreg, vreg[n])), "n"(n));
93
94	/*
95	 * Load all 32 vector registers
96	 */
97	LVX( 0,vr);	LVX( 1,vr);	LVX( 2,vr);	LVX( 3,vr);
98	LVX( 4,vr);	LVX( 5,vr);	LVX( 6,vr);	LVX( 7,vr);
99	LVX( 8,vr);	LVX( 9,vr);	LVX(10,vr);	LVX(11,vr);
100	LVX(12,vr);	LVX(13,vr);	LVX(14,vr);	LVX(15,vr);
101
102	LVX(16,vr);	LVX(17,vr);	LVX(18,vr);	LVX(19,vr);
103	LVX(20,vr);	LVX(21,vr);	LVX(22,vr);	LVX(23,vr);
104	LVX(24,vr);	LVX(25,vr);	LVX(26,vr);	LVX(27,vr);
105	LVX(28,vr);	LVX(29,vr);	LVX(30,vr);	LVX(31,vr);
106	__asm __volatile ("isync");
107
108	/*
109	 * Enable AltiVec when we return to user-mode.
110	 * Record the new ownership of the AltiVec unit.
111	 */
112	curcpu()->ci_veclwp = l;
113	pcb->pcb_veccpu = curcpu();
114	pcb->pcb_flags |= PCB_OWNALTIVEC;
115	__asm __volatile ("sync");
116
117	/*
118	 * Restore MSR (turn off AltiVec)
119	 */
120	mtmsr(msr);
121}
122
123void
124save_vec_cpu(void)
125{
126	struct cpu_info *ci = curcpu();
127	struct lwp *l;
128	struct pcb *pcb;
129	struct vreg *vr;
130	struct trapframe *tf;
131	register_t msr;
132
133	/*
134	 * Turn on AltiVEC, turn off interrupts.
135	 */
136	msr = mfmsr();
137	mtmsr((msr & ~PSL_EE) | PSL_VEC);
138	__asm __volatile ("isync");
139	l = ci->ci_veclwp;
140	if (l == NULL)
141		goto out;
142	pcb = &l->l_addr->u_pcb;
143	vr = &pcb->pcb_vr;
144	tf = trapframe(l);
145
146#define	STVX(n,vr)	__asm /*__volatile*/("stvx %2,%0,%1" \
147	    ::	"b"(vr), "r"(offsetof(struct vreg, vreg[n])), "n"(n));
148
149	/*
150	 * Save the vector registers.
151	 */
152	STVX( 0,vr);	STVX( 1,vr);	STVX( 2,vr);	STVX( 3,vr);
153	STVX( 4,vr);	STVX( 5,vr);	STVX( 6,vr);	STVX( 7,vr);
154	STVX( 8,vr);	STVX( 9,vr);	STVX(10,vr);	STVX(11,vr);
155	STVX(12,vr);	STVX(13,vr);	STVX(14,vr);	STVX(15,vr);
156
157	STVX(16,vr);	STVX(17,vr);	STVX(18,vr);	STVX(19,vr);
158	STVX(20,vr);	STVX(21,vr);	STVX(22,vr);	STVX(23,vr);
159	STVX(24,vr);	STVX(25,vr);	STVX(26,vr);	STVX(27,vr);
160	STVX(28,vr);	STVX(29,vr);	STVX(30,vr);	STVX(31,vr);
161
162	/*
163	 * Save VSCR (this needs to be done after save the vector registers
164	 * since we need to use one as scratch).
165	 */
166	__asm __volatile("mfvscr %2; stvewx %2,%0,%1" \
167	    ::	"b"(vr), "r"(offsetof(struct vreg, vscr)), "n"(0));
168
169	/*
170	 * Save VRSAVE
171	 */
172	vr->vrsave = tf->tf_xtra[TF_VRSAVE];
173
174	/*
175	 * Note that we aren't using any CPU resources and stop any
176	 * data streams.
177	 */
178	pcb->pcb_veccpu = NULL;
179	ci->ci_veclwp = NULL;
180	__asm __volatile ("dssall; sync");
181
182 out:
183
184	/*
185	 * Restore MSR (turn off AltiVec)
186	 */
187	mtmsr(msr);
188}
189
190/*
191 * Save a process's AltiVEC state to its PCB.  The state may be in any CPU.
192 * The process must either be curproc or traced by curproc (and stopped).
193 * (The point being that the process must not run on another CPU during
194 * this function).
195 */
196void
197save_vec_lwp(struct lwp *l, int discard)
198{
199	struct pcb * const pcb = &l->l_addr->u_pcb;
200	struct cpu_info * const ci = curcpu();
201
202	/*
203	 * If it's already in the PCB, there's nothing to do.
204	 */
205	if (pcb->pcb_veccpu == NULL)
206		return;
207
208	/*
209	 * If we simply need to discard the information, then don't
210	 * to save anything.
211	 */
212	if (discard) {
213#ifndef MULTIPROCESSOR
214		KASSERT(ci == pcb->pcb_veccpu);
215#endif
216		KASSERT(l == pcb->pcb_veccpu->ci_veclwp);
217		pcb->pcb_veccpu->ci_veclwp = NULL;
218		pcb->pcb_veccpu = NULL;
219		pcb->pcb_flags &= ~PCB_OWNALTIVEC;
220		return;
221	}
222
223	/*
224	 * If the state is in the current CPU, just flush the current CPU's
225	 * state.
226	 */
227	if (l == ci->ci_veclwp) {
228		save_vec_cpu();
229		return;
230	}
231
232
233#ifdef MULTIPROCESSOR
234	/*
235	 * It must be on another CPU, flush it from there.
236	 */
237
238	mp_save_vec_lwp(l);
239#endif
240}
241
242#define ZERO_VEC	19
243
244void
245vzeropage(paddr_t pa)
246{
247	const paddr_t ea = pa + PAGE_SIZE;
248	uint32_t vec[7], *vp = (void *) roundup((uintptr_t) vec, 16);
249	register_t omsr, msr;
250
251	__asm __volatile("mfmsr %0" : "=r"(omsr) :);
252
253	/*
254	 * Turn on AltiVec, turn off interrupts.
255	 */
256	msr = (omsr & ~PSL_EE) | PSL_VEC;
257	__asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
258
259	/*
260	 * Save the VEC register we are going to use before we disable
261	 * relocation.
262	 */
263	__asm("stvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC));
264	__asm("vxor %0,%0,%0" :: "n"(ZERO_VEC));
265
266	/*
267	 * Zero the page using a single cache line.
268	 */
269	__asm __volatile(
270	    "   sync ;"
271	    "   mfmsr  %[msr];"
272	    "   rlwinm %[msr],%[msr],0,28,26;"	/* Clear PSL_DR */
273	    "   mtmsr  %[msr];"			/* Turn off DMMU */
274	    "   isync;"
275	    "1: stvx   %[zv], %[pa], %[off0];"
276	    "   stvxl  %[zv], %[pa], %[off16];"
277	    "   stvx   %[zv], %[pa], %[off32];"
278	    "   stvxl  %[zv], %[pa], %[off48];"
279	    "   addi   %[pa], %[pa], 64;"
280	    "   cmplw  %[pa], %[ea];"
281	    "	blt+   1b;"
282	    "   ori    %[msr], %[msr], 0x10;"	/* Set PSL_DR */
283	    "   sync;"
284	    "	mtmsr  %[msr];"			/* Turn on DMMU */
285	    "   isync;"
286	    :: [msr] "r"(msr), [pa] "b"(pa), [ea] "b"(ea),
287	    [off0] "r"(0), [off16] "r"(16), [off32] "r"(32), [off48] "r"(48),
288	    [zv] "n"(ZERO_VEC));
289
290	/*
291	 * Restore VEC register (now that we can access the stack again).
292	 */
293	__asm("lvx %1,0,%0" :: "r"(vp), "n"(ZERO_VEC));
294
295	/*
296	 * Restore old MSR (AltiVec OFF).
297	 */
298	__asm __volatile("sync; mtmsr %0; isync" :: "r"(omsr));
299}
300
301#define LO_VEC	16
302#define HI_VEC	17
303
304void
305vcopypage(paddr_t dst, paddr_t src)
306{
307	const paddr_t edst = dst + PAGE_SIZE;
308	uint32_t vec[11], *vp = (void *) roundup((uintptr_t) vec, 16);
309	register_t omsr, msr;
310
311	__asm __volatile("mfmsr %0" : "=r"(omsr) :);
312
313	/*
314	 * Turn on AltiVec, turn off interrupts.
315	 */
316	msr = (omsr & ~PSL_EE) | PSL_VEC;
317	__asm __volatile("sync; mtmsr %0; isync" :: "r"(msr));
318
319	/*
320	 * Save the VEC registers we will be using before we disable
321	 * relocation.
322	 */
323	__asm("stvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC));
324	__asm("stvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC));
325
326	/*
327	 * Copy the page using a single cache line, with DMMU
328	 * disabled.  On most PPCs, two vector registers occupy one
329	 * cache line.
330	 */
331	__asm __volatile(
332	    "   sync ;"
333	    "   mfmsr  %[msr];"
334	    "   rlwinm %[msr],%[msr],0,28,26;"	/* Clear PSL_DR */
335	    "   mtmsr  %[msr];"			/* Turn off DMMU */
336	    "   isync;"
337	    "1: lvx    %[lv], %[src], %[off0];"
338	    "   stvx   %[lv], %[dst], %[off0];"
339	    "   lvxl   %[hv], %[src], %[off16];"
340	    "   stvxl  %[hv], %[dst], %[off16];"
341	    "   addi   %[src], %[src], 32;"
342	    "   addi   %[dst], %[dst], 32;"
343	    "   cmplw  %[dst], %[edst];"
344	    "	blt+   1b;"
345	    "   ori    %[msr], %[msr], 0x10;"	/* Set PSL_DR */
346	    "   sync;"
347	    "	mtmsr  %[msr];"			/* Turn on DMMU */
348	    "   isync;"
349	    :: [msr] "r"(msr), [src] "b"(src), [dst] "b"(dst),
350	    [edst] "b"(edst), [off0] "r"(0), [off16] "r"(16),
351	    [lv] "n"(LO_VEC), [hv] "n"(HI_VEC));
352
353	/*
354	 * Restore VEC registers (now that we can access the stack again).
355	 */
356	__asm("lvx %2,%1,%0" :: "b"(vp), "r"( 0), "n"(LO_VEC));
357	__asm("lvx %2,%1,%0" :: "b"(vp), "r"(16), "n"(HI_VEC));
358
359	/*
360	 * Restore old MSR (AltiVec OFF).
361	 */
362	__asm __volatile("sync; mtmsr %0; isync" :: "r"(omsr));
363}
364