1/*	$NetBSD: mp_subr.S,v 1.10 2016/05/16 20:03:07 palle Exp $	*/
2
3/*
4 * Copyright (c) 2006-2010 Matthew R. Green
5 * Copyright (c) 1996-2002 Eduardo Horvath
6 * Copyright (c) 1996 Paul Kranenburg
7 * Copyright (c) 1996
8 * 	The President and Fellows of Harvard College.
9 *	All rights reserved.
10 * Copyright (c) 1992, 1993
11 *	The Regents of the University of California.
12 *	All rights reserved.
13 *
14 * This software was developed by the Computer Systems Engineering group
15 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
16 * contributed to Berkeley.
17 *
18 * All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 *	This product includes software developed by the University of
21 *	California, Lawrence Berkeley Laboratory.
22 *	This product includes software developed by Harvard University.
23 *
24 * Redistribution and use in source and binary forms, with or without
25 * modification, are permitted provided that the following conditions
26 * are met:
27 * 1. Redistributions of source code must retain the above copyright
28 *    notice, this list of conditions and the following disclaimer.
29 * 2. Redistributions in binary form must reproduce the above copyright
30 *    notice, this list of conditions and the following disclaimer in the
31 *    documentation and/or other materials provided with the
32 *    distribution.
33 * 3. All advertising materials mentioning features or use of this
34 *    software must display the following acknowledgement:
35 *	This product includes software developed by the University of
36 *	California, Berkeley and its contributors.
37 *	This product includes software developed by Harvard University.
38 *	This product includes software developed by Paul Kranenburg.
39 * 4. Neither the name of the University nor the names of its
40 *    contributors may be used to endorse or promote products derived
41 *    from this software without specific prior written permission.
42 *
43 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
44 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
45 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
46 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
47 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
49 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
51 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
52 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
53 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
54 * DAMAGE.
55 *
56 *	@(#)locore.s	8.4 (Berkeley) 12/10/93
57 */
58
59#include "opt_ddb.h"
60#include "opt_kgdb.h"
61#include "opt_multiprocessor.h"
62#include "opt_compat_netbsd.h"
63#include "opt_compat_netbsd32.h"
64#include "opt_lockdebug.h"
65
66#include "assym.h"
67#include <machine/param.h>
68#include <sparc64/sparc64/intreg.h>
69#include <sparc64/sparc64/timerreg.h>
70#include <machine/ctlreg.h>
71#include <machine/psl.h>
72#include <machine/signal.h>
73#include <machine/trap.h>
74#include <machine/frame.h>
75#include <machine/pmap.h>
76#include <machine/intr.h>
77#include <machine/asm.h>
78#include <machine/locore.h>
79#include <sys/syscall.h>
80#ifdef SUN4V
81#include <machine/hypervisor.h>
82#endif
83#include "ksyms.h"
84
85	.register	%g2,#scratch
86	.register	%g3,#scratch
87
88#define BLOCK_SIZE SPARC64_BLOCK_SIZE
89#define BLOCK_ALIGN SPARC64_BLOCK_ALIGN
90
91#if defined(MULTIPROCESSOR)
92/*
93 * IPI handler to do nothing, but causes rescheduling..
94 * void sparc64_ipi_nop(void *);
95 */
96ENTRY(sparc64_ipi_nop)
97	ba,a	ret_from_intr_vector
98	 nop
99
100/*
101 * IPI handler to halt the CPU.  Just calls the C vector.
102 * void sparc64_ipi_halt(void *);
103 */
104ENTRY(sparc64_ipi_halt)
105	call	_C_LABEL(sparc64_ipi_halt_thiscpu)
106	 clr	%g4
107	sir
108
109/*
110 * IPI handler to pause the CPU.  We just trap to the debugger if it
111 * is configured, otherwise just return.
112 */
113ENTRY(sparc64_ipi_pause)
114#if defined(DDB)
115	.global sparc64_ipi_pause_trap_point
116sparc64_ipi_pause_trap_point:
117	ta	1
118	 nop
119#endif
120	ba,a	ret_from_intr_vector
121	 nop
122
123/*
124 * Increment IPI event counter, defined in machine/{cpu,intr}.h.
125 */
126#define IPIEVC_INC(n,r1,r2)						\
127	sethi	%hi(CPUINFO_VA+CI_IPIEVC+EVC_SIZE*n), r2;		\
128	ldx	[r2 + %lo(CPUINFO_VA+CI_IPIEVC+EVC_SIZE*n)], r1;	\
129	inc	r1;							\
130	stx	r1, [r2 + %lo(CPUINFO_VA+CI_IPIEVC+EVC_SIZE*n)]
131
132/*
133 * void sparc64_ipi_flush_pte_us(void *);
134 * void sparc64_ipi_flush_pte_usiii(void *);
135 *
136 * IPI handler to flush single pte.  We enter here with %tl already 1
137 * and PSTATE_IE already disabled, so there's no need to do it again.
138 *
139 * On entry:
140 *	%g2 = vaddr_t va
141 *	%g3 = int ctx
142 */
143ENTRY(sparc64_ipi_flush_pte_us)
144	srlx	%g2, PG_SHIFT4U, %g2		! drop unused va bits
145	mov	CTX_SECONDARY, %g5
146	sllx	%g2, PG_SHIFT4U, %g2
147	ldxa	[%g5] ASI_DMMU, %g6		! Save secondary context
148	sethi	%hi(KERNBASE), %g7
149	membar	#LoadStore
150	stxa	%g3, [%g5] ASI_DMMU		! Insert context to demap
151	membar	#Sync
152	or	%g2, DEMAP_PAGE_SECONDARY, %g2	! Demap page from secondary context only
153	stxa	%g2, [%g2] ASI_DMMU_DEMAP	! Do the demap
154	stxa	%g2, [%g2] ASI_IMMU_DEMAP	! to both TLBs
155#ifdef TLB_FLUSH_LOWVA
156	srl	%g2, 0, %g2			! and make sure it's both 32- and 64-bit entries
157	stxa	%g2, [%g2] ASI_DMMU_DEMAP	! Do the demap
158	stxa	%g2, [%g2] ASI_IMMU_DEMAP	! Do the demap
159#endif
160	flush	%g7
161	stxa	%g6, [%g5] ASI_DMMU		! Restore secondary context
162	membar	#Sync
163	IPIEVC_INC(IPI_EVCNT_TLB_PTE,%g2,%g3)
164
165	ba,a	ret_from_intr_vector
166	 nop
167
168ENTRY(sparc64_ipi_flush_pte_usiii)
169	andn	%g2, 0xfff, %g2			! drop unused va bits
170	mov	CTX_PRIMARY, %g5
171	ldxa	[%g5] ASI_DMMU, %g6		! Save primary context
172	sethi	%hi(KERNBASE), %g7
173	membar	#LoadStore
174	stxa	%g3, [%g5] ASI_DMMU		! Insert context to demap
175	membar	#Sync
176	or	%g2, DEMAP_PAGE_PRIMARY, %g2
177	stxa	%g2, [%g2] ASI_DMMU_DEMAP	! Do the demap
178	stxa	%g2, [%g2] ASI_IMMU_DEMAP	! to both TLBs
179#ifdef TLB_FLUSH_LOWVA
180	srl	%g2, 0, %g2			! and make sure it's both 32- and 64-bit entries
181	stxa	%g2, [%g2] ASI_DMMU_DEMAP	! Do the demap
182	stxa	%g2, [%g2] ASI_IMMU_DEMAP	! Do the demap
183#endif
184	membar	#Sync
185	flush	%g7
186	stxa	%g6, [%g5] ASI_DMMU		! Restore primary context
187	membar	#Sync
188	flush	%g7
189	IPIEVC_INC(IPI_EVCNT_TLB_PTE,%g2,%g3)
190
191	ba,a	ret_from_intr_vector
192	 nop
193
194#ifdef SUN4V
195ENTRY(sparc64_ipi_flush_pte_sun4v)
196	mov	%o0, %g1		! save input
197	mov	%o1, %g2
198	mov	%o2, %g4
199	mov	%g3, %o0		! vaddr
200	mov	%g5, %o1		! ctx
201	mov	MAP_DTLB|MAP_ITLB, %o2	! flags
202	ta	ST_MMU_UNMAP_ADDR
203	mov	%g1, %o0		! restore input
204	mov	%g2, %o1
205	mov	%g4, %o2
206
207	retry
208#endif
209
210/*
211 * Secondary CPU bootstrap code.
212 */
213	.text
214	.align 32
2151:	rd	%pc, %l0
216	LDULNG	[%l0 + (3f-1b)], %l1	! Load itlb slot count
217	LDULNG	[%l0 + (7f-1b)], %g2	! Load cpu_args address.
218	add	%l0, (6f-1b), %l2	! tlb slots
219	ld	[%g2 + CBA_CPUTYP], %g3 ! Load cputype
220	clr	%l3
221.Litlb_loop:
222	cmp	%l3, %l1
223	be	CCCR, .Litlb_done
224	 nop
225	ldx	[%l2 + TTE_VPN], %l4
226	ldx	[%l2 + TTE_DATA], %l5
227#ifdef SUN4V
228	cmp	%g3, CPU_SUN4V
229	bne,pt	%icc, .Litlb_4u
230	 nop
231	! sun4v
232	mov	%l4, %o0			! vaddr
233	clr	%o1				! reserved
234	mov	%l5, %o2			! tte
235	mov	MAP_DTLB|MAP_ITLB, %o3		! flags
236	mov	FT_MMU_MAP_PERM_ADDR, %o5	! hv fast trap function
237	ta	ST_FAST_TRAP
238	cmp	%o0, 0
239	be,pt	%icc, .Litlb_next
240	 nop
241	sir					! crash if mapping fails
242.Litlb_4u:
243#endif
244	! sun4u
245	wr	%g0, ASI_DMMU, %asi
246	stxa	%l4, [%g0 + TLB_TAG_ACCESS] %asi
247	stxa	%l5, [%g0] ASI_DMMU_DATA_IN
248	wr	%g0, ASI_IMMU, %asi
249	stxa	%l4, [%g0 + TLB_TAG_ACCESS] %asi
250	stxa	%l5, [%g0] ASI_IMMU_DATA_IN
251.Litlb_next:
252	membar	#Sync
253	flush	%l4
254	add	%l2, PTE_SIZE, %l2
255	add	%l3, 1, %l3
256	ba	%xcc, .Litlb_loop
257	 nop
258.Litlb_done:
259	! continue the same loop (with indices and pointers et al),
260	! but load a new upper limit and do not push the entries into
261	! the itlb
262	LDULNG	[%l0 + (4f-1b)], %l1	! Load dtlb slot count
263.Ldtlb_loop:
264	cmp	%l3, %l1
265	be	CCCR, .Ldtlb_done
266	 nop
267	ldx	[%l2 + TTE_VPN], %l4
268	ldx	[%l2 + TTE_DATA], %l5
269#ifdef SUN4V
270	cmp	%g3, CPU_SUN4V
271	bne,pt	%icc, .Ldtlb_4u
272	 nop
273	! sun4v
274	mov	%l4, %o0			! vaddr
275	clr	%o1				! reserved
276	mov	%l5, %o2			! tte
277	mov	MAP_DTLB, %o3			! flags
278	mov	FT_MMU_MAP_PERM_ADDR, %o5	! hv fast trap function
279	ta	ST_FAST_TRAP
280	cmp	%o0, 0
281	be,pt	%icc, .Ldtlb_next
282	 nop
283	sir					! crash if mapping fails
284.Ldtlb_4u:
285#endif
286	! sun4u
287	wr	%g0, ASI_DMMU, %asi
288	stxa	%l4, [%g0 + TLB_TAG_ACCESS] %asi
289	stxa	%l5, [%g0] ASI_DMMU_DATA_IN
290.Ldtlb_next:
291	membar	#Sync
292	flush	%l4
293	add	%l2, PTE_SIZE, %l2
294	add	%l3, 1, %l3
295	ba	%xcc, .Ldtlb_loop
296	 nop
297.Ldtlb_done:
298	LDULNG	[%l0 + (5f-1b)], %l1	! Load function
299	jmpl	%l1, %g0
300	 nop
301
302	.align PTRSZ
3034:	ULONG	0x0
3043:	ULONG	0x0
3055:	ULONG	0x0
3067:	ULONG	0x0
307	_ALIGN
3086:
309
310#define DATA(name) \
311        .data ; \
312        .align PTRSZ ; \
313        .globl  name ; \
314name:
315
316DATA(mp_tramp_code)
317	POINTER	1b
318DATA(mp_tramp_code_len)
319	ULONG	6b-1b
320DATA(mp_tramp_dtlb_slots)
321	ULONG	4b-1b
322DATA(mp_tramp_itlb_slots)
323	ULONG	3b-1b
324DATA(mp_tramp_func)
325	ULONG	5b-1b
326DATA(mp_tramp_ci)
327	ULONG	7b-1b
328
329	.text
330	.align 32
331
332
333/*
334 * IPI handler to store the current FPU state.
335 * void sparc64_ipi_save_fpstate(void *);
336 *
337 * On entry:
338 *	%g2 = lwp
339 */
340ENTRY(sparc64_ipi_save_fpstate)
341	sethi	%hi(FPLWP), %g1
342	LDPTR	[%g1 + %lo(FPLWP)], %g3
343	cmp	%g3, %g2
344	bne,pn	CCCR, 7f		! skip if fplwp has changed
345
346	 rdpr	%pstate, %g2		! enable FP before we begin
347	rd	%fprs, %g5
348	wr	%g0, FPRS_FEF, %fprs
349	or	%g2, PSTATE_PEF, %g2
350	wrpr	%g2, 0, %pstate
351
352	LDPTR	[%g3 + L_FPSTATE], %g3
353	stx	%fsr, [%g3 + FS_FSR]	! f->fs_fsr = getfsr();
354	rd	%gsr, %g2		! Save %gsr
355	st	%g2, [%g3 + FS_GSR]
356#if FS_REGS > 0
357	add	%g3, FS_REGS, %g3
358#endif
359#ifdef DIAGNOSTIC
360	btst	BLOCK_ALIGN, %g3	! Needs to be re-executed
361	bnz,pn	%icc, 6f		! Check alignment
362#endif
363	 st	%g0, [%g3 + FS_QSIZE - FS_REGS]	! f->fs_qsize = 0;
364	btst	FPRS_DL|FPRS_DU, %g5	! Both FPU halves clean?
365	bz,pt	%icc, 5f		! Then skip it
366
367	 mov	CTX_PRIMARY, %g2
368	ldxa	[%g2] ASI_DMMU, %g6
369	membar	#LoadStore
370	stxa	%g0, [%g2] ASI_DMMU	! Switch MMU to kernel primary context
371	membar	#Sync
372
373	btst	FPRS_DL, %g5		! Lower FPU clean?
374	bz,a,pt	%icc, 1f		! Then skip it, but upper FPU not clean
375	 add	%g3, 2*BLOCK_SIZE, %g3	! Skip a block
376
377	stda	%f0, [%g3] ASI_BLK_P	! f->fs_f0 = etc;
378	inc	BLOCK_SIZE, %g3
379	stda	%f16, [%g3] ASI_BLK_P
380
381	btst	FPRS_DU, %g5		! Upper FPU clean?
382	bz,pt	%icc, 2f		! Then skip it
383	 inc	BLOCK_SIZE, %g3
3841:
385	stda	%f32, [%g3] ASI_BLK_P
386	inc	BLOCK_SIZE, %g3
387	stda	%f48, [%g3] ASI_BLK_P
3882:
389	membar	#Sync			! Finish operation so we can
390	brz,pn	%g6, 5f			! Skip if context 0
391	 nop
392	stxa	%g6, [%g2] ASI_DMMU	! Restore primary context
393	membar	#Sync
3945:
395	wr	%g0, FPRS_FEF, %fprs	! Mark FPU clean
396	STPTR	%g0, [%g1 + %lo(FPLWP)]	! fplwp = NULL
3977:
398	IPIEVC_INC(IPI_EVCNT_FPU_SYNCH,%g2,%g3)
399	ba,a	ret_from_intr_vector
400	 nop
401
402#ifdef DIAGNOSTIC
403	!!
404	!! Damn thing is *NOT* aligned on a 64-byte boundary
405	!!
4066:
407	wr	%g0, FPRS_FEF, %fprs
408	! XXX -- we should panic instead of silently entering debugger
409	ta	1
410	 nop
411	ba,a	ret_from_intr_vector
412	 nop
413#endif
414
415/*
416 * IPI handler to drop the current FPU state.
417 * void sparc64_ipi_drop_fpstate(void *);
418 *
419 * On entry:
420 *	%g2 = lwp
421 */
422ENTRY(sparc64_ipi_drop_fpstate)
423	rdpr	%pstate, %g1
424	wr	%g0, FPRS_FEF, %fprs
425	or	%g1, PSTATE_PEF, %g1
426	wrpr	%g1, 0, %pstate
427	set	FPLWP, %g1
428	CASPTRA	[%g1] ASI_N, %g2, %g0	! fplwp = NULL if fplwp == %g2
429	membar	#Sync			! Should not be needed due to retry
430	IPIEVC_INC(IPI_EVCNT_FPU_FLUSH,%g2,%g3)
431	ba,a	ret_from_intr_vector
432	 nop
433
434/*
435 * Flush data cache page.
436 * void sparc64_ipi_dcache_flush_page_usiii(paddr_t pa, int line_size)
437 * void sparc64_ipi_dcache_flush_page_us(paddr_t pa, int line_size)
438 * void sparc64_ipi_dcache_flush_page_sun4v(paddr_t pa, int line_size)
439 *
440 * On entry:
441 *	%g2 = pa
442 *	%g3 = line_size
443 */
444ENTRY(sparc64_ipi_dcache_flush_page_usiii)
445	set	NBPG, %g1
446	add	%g2, %g1, %g1	! end address
447
4481:
449	stxa	%g0, [%g2] ASI_DCACHE_INVALIDATE
450	add	%g2, %g3, %g2
451	cmp	%g2, %g1
452	bl,pt	%xcc, 1b
453	 nop
454
455	sethi	%hi(KERNBASE), %g5
456	flush	%g5
457	membar	#Sync
458	ba,a	ret_from_intr_vector
459	 nop
460
461ENTRY(sparc64_ipi_dcache_flush_page_us)
462	mov	-1, %g1		! Generate mask for tag: bits [29..2]
463	srlx	%g2, 13-2, %g5	! Tag is PA bits <40:13> in bits <29:2>
464	clr	%g4
465	srl	%g1, 2, %g1	! Now we have bits <29:0> set
466	set	(2*NBPG), %g7
467	ba,pt	%icc, 1f
468	 andn	%g1, 3, %g1	! Now we have bits <29:2> set
469
470	.align 8
4711:
472	ldxa	[%g4] ASI_DCACHE_TAG, %g6
473	mov	%g4, %g2
474	deccc	32, %g7
475	bl,pn	%icc, 2f
476	 inc	32, %g4
477
478	xor	%g6, %g5, %g6
479	andcc	%g6, %g1, %g0
480	bne,pt	%xcc, 1b
481	 membar	#LoadStore
482
483	stxa	%g0, [%g2] ASI_DCACHE_TAG
484	ba,pt	%icc, 1b
485	 membar	#StoreLoad
4862:
487
488	sethi	%hi(KERNBASE), %g5
489	flush	%g5
490	membar	#Sync
491	ba,a	ret_from_intr_vector
492	 nop
493
494#ifdef SUN4V
495ENTRY(sparc64_ipi_dcache_flush_page_sun4v)
496	set	NBPG, %o1
497	call	hv_mem_sync
498	 mov	%g2, %o0
499	cmp	%o0, 0
500	be,pt	%icc, 1f
501	 nop
502	sir				! crash if hv-call fails
5031:
504	sethi	%hi(KERNBASE), %g5
505	flush	%g5
506	membar	#Sync
507	ba,a	ret_from_intr_vector
508	 nop
509#endif
510
511#endif
512