mp_machdep.c revision 295128
1/*-
2 * Copyright (c) 2011 Semihalf.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26#include <sys/cdefs.h>
27__FBSDID("$FreeBSD: head/sys/arm/arm/mp_machdep.c 295128 2016-02-01 19:36:33Z skra $");
28#include <sys/param.h>
29#include <sys/systm.h>
30#include <sys/bus.h>
31#include <sys/kernel.h>
32#include <sys/lock.h>
33#include <sys/mutex.h>
34#include <sys/proc.h>
35#include <sys/pcpu.h>
36#include <sys/sched.h>
37#include <sys/smp.h>
38#include <sys/ktr.h>
39#include <sys/malloc.h>
40
41#include <vm/vm.h>
42#include <vm/vm_extern.h>
43#include <vm/vm_kern.h>
44#include <vm/pmap.h>
45
46#include <machine/acle-compat.h>
47#include <machine/armreg.h>
48#include <machine/cpu.h>
49#include <machine/cpufunc.h>
50#include <machine/debug_monitor.h>
51#include <machine/smp.h>
52#include <machine/pcb.h>
53#include <machine/pmap.h>
54#include <machine/physmem.h>
55#include <machine/intr.h>
56#include <machine/vmparam.h>
57#ifdef VFP
58#include <machine/vfp.h>
59#endif
60#ifdef CPU_MV_PJ4B
61#include <arm/mv/mvwin.h>
62#include <dev/fdt/fdt_common.h>
63#endif
64
65#include "opt_smp.h"
66
67extern struct pcpu __pcpu[];
68/* used to hold the AP's until we are ready to release them */
69struct mtx ap_boot_mtx;
70struct pcb stoppcbs[MAXCPU];
71
72/* # of Applications processors */
73volatile int mp_naps;
74
75/* Set to 1 once we're ready to let the APs out of the pen. */
76volatile int aps_ready = 0;
77
78#ifndef ARM_INTRNG
79static int ipi_handler(void *arg);
80#endif
81void set_stackptrs(int cpu);
82
83/* Temporary variables for init_secondary()  */
84void *dpcpu[MAXCPU - 1];
85
86/* Determine if we running MP machine */
87int
88cpu_mp_probe(void)
89{
90	CPU_SETOF(0, &all_cpus);
91
92	return (platform_mp_probe());
93}
94
95/* Start Application Processor via platform specific function */
96static int
97check_ap(void)
98{
99	uint32_t ms;
100
101	for (ms = 0; ms < 2000; ++ms) {
102		if ((mp_naps + 1) == mp_ncpus)
103			return (0);		/* success */
104		else
105			DELAY(1000);
106	}
107
108	return (-2);
109}
110
111extern unsigned char _end[];
112
113/* Initialize and fire up non-boot processors */
114void
115cpu_mp_start(void)
116{
117	int error, i;
118
119	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
120
121	/* Reserve memory for application processors */
122	for(i = 0; i < (mp_ncpus - 1); i++)
123		dpcpu[i] = (void *)kmem_malloc(kernel_arena, DPCPU_SIZE,
124		    M_WAITOK | M_ZERO);
125
126	cpu_idcache_wbinv_all();
127	cpu_l2cache_wbinv_all();
128	cpu_idcache_wbinv_all();
129
130	/* Initialize boot code and start up processors */
131	platform_mp_start_ap();
132
133	/*  Check if ap's started properly */
134	error = check_ap();
135	if (error)
136		printf("WARNING: Some AP's failed to start\n");
137	else
138		for (i = 1; i < mp_ncpus; i++)
139			CPU_SET(i, &all_cpus);
140}
141
142/* Introduce rest of cores to the world */
143void
144cpu_mp_announce(void)
145{
146
147}
148
149extern vm_paddr_t pmap_pa;
150void
151init_secondary(int cpu)
152{
153	struct pcpu *pc;
154	uint32_t loop_counter;
155#ifndef ARM_INTRNG
156	int start = 0, end = 0;
157#endif
158	uint32_t actlr_mask, actlr_set;
159
160	pmap_set_tex();
161	cpuinfo_get_actlr_modifier(&actlr_mask, &actlr_set);
162	reinit_mmu(pmap_kern_ttb, actlr_mask, actlr_set);
163	cpu_setup();
164
165	/* Provide stack pointers for other processor modes. */
166	set_stackptrs(cpu);
167
168	enable_interrupts(PSR_A);
169	pc = &__pcpu[cpu];
170
171	/*
172	 * pcpu_init() updates queue, so it should not be executed in parallel
173	 * on several cores
174	 */
175	while(mp_naps < (cpu - 1))
176		;
177
178	pcpu_init(pc, cpu, sizeof(struct pcpu));
179	dpcpu_init(dpcpu[cpu - 1], cpu);
180	/* Signal our startup to BSP */
181	atomic_add_rel_32(&mp_naps, 1);
182
183	/* Spin until the BSP releases the APs */
184	while (!atomic_load_acq_int(&aps_ready)) {
185#if __ARM_ARCH >= 7
186		__asm __volatile("wfe");
187#endif
188	}
189
190	/* Initialize curthread */
191	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
192	pc->pc_curthread = pc->pc_idlethread;
193	pc->pc_curpcb = pc->pc_idlethread->td_pcb;
194	set_curthread(pc->pc_idlethread);
195#ifdef VFP
196	vfp_init();
197#endif
198
199	mtx_lock_spin(&ap_boot_mtx);
200
201	atomic_add_rel_32(&smp_cpus, 1);
202
203	if (smp_cpus == mp_ncpus) {
204		/* enable IPI's, tlb shootdown, freezes etc */
205		atomic_store_rel_int(&smp_started, 1);
206	}
207
208	mtx_unlock_spin(&ap_boot_mtx);
209
210#ifndef ARM_INTRNG
211	/* Enable ipi */
212#ifdef IPI_IRQ_START
213	start = IPI_IRQ_START;
214#ifdef IPI_IRQ_END
215	end = IPI_IRQ_END;
216#else
217	end = IPI_IRQ_START;
218#endif
219#endif
220
221	for (int i = start; i <= end; i++)
222		arm_unmask_irq(i);
223#endif /* INTRNG */
224	enable_interrupts(PSR_I);
225
226	loop_counter = 0;
227	while (smp_started == 0) {
228		DELAY(100);
229		loop_counter++;
230		if (loop_counter == 1000)
231			CTR0(KTR_SMP, "AP still wait for smp_started");
232	}
233	/* Start per-CPU event timers. */
234	cpu_initclocks_ap();
235
236	CTR0(KTR_SMP, "go into scheduler");
237	platform_mp_init_secondary();
238
239	/* Enter the scheduler */
240	sched_throw(NULL);
241
242	panic("scheduler returned us to %s", __func__);
243	/* NOTREACHED */
244}
245
246#ifdef ARM_INTRNG
247static void
248ipi_rendezvous(void *dummy __unused)
249{
250
251	CTR0(KTR_SMP, "IPI_RENDEZVOUS");
252	smp_rendezvous_action();
253}
254
255static void
256ipi_ast(void *dummy __unused)
257{
258
259	CTR0(KTR_SMP, "IPI_AST");
260}
261
262static void
263ipi_stop(void *dummy __unused)
264{
265	u_int cpu;
266
267	/*
268	 * IPI_STOP_HARD is mapped to IPI_STOP.
269	 */
270	CTR0(KTR_SMP, "IPI_STOP or IPI_STOP_HARD");
271
272	cpu = PCPU_GET(cpuid);
273	savectx(&stoppcbs[cpu]);
274
275	/*
276	 * CPUs are stopped when entering the debugger and at
277	 * system shutdown, both events which can precede a
278	 * panic dump.  For the dump to be correct, all caches
279	 * must be flushed and invalidated, but on ARM there's
280	 * no way to broadcast a wbinv_all to other cores.
281	 * Instead, we have each core do the local wbinv_all as
282	 * part of stopping the core.  The core requesting the
283	 * stop will do the l2 cache flush after all other cores
284	 * have done their l1 flushes and stopped.
285	 */
286	cpu_idcache_wbinv_all();
287
288	/* Indicate we are stopped */
289	CPU_SET_ATOMIC(cpu, &stopped_cpus);
290
291	/* Wait for restart */
292	while (!CPU_ISSET(cpu, &started_cpus))
293		cpu_spinwait();
294
295	CPU_CLR_ATOMIC(cpu, &started_cpus);
296	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
297#ifdef DDB
298	dbg_resume_dbreg();
299#endif
300	CTR0(KTR_SMP, "IPI_STOP (restart)");
301}
302
303static void
304ipi_preempt(void *arg)
305{
306	struct trapframe *oldframe;
307	struct thread *td;
308
309	critical_enter();
310	td = curthread;
311	td->td_intr_nesting_level++;
312	oldframe = td->td_intr_frame;
313	td->td_intr_frame = (struct trapframe *)arg;
314
315	CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
316	sched_preempt(td);
317
318	td->td_intr_frame = oldframe;
319	td->td_intr_nesting_level--;
320	critical_exit();
321}
322
323static void
324ipi_hardclock(void *arg)
325{
326	struct trapframe *oldframe;
327	struct thread *td;
328
329	critical_enter();
330	td = curthread;
331	td->td_intr_nesting_level++;
332	oldframe = td->td_intr_frame;
333	td->td_intr_frame = (struct trapframe *)arg;
334
335	CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
336	hardclockintr();
337
338	td->td_intr_frame = oldframe;
339	td->td_intr_nesting_level--;
340	critical_exit();
341}
342
343#else
344static int
345ipi_handler(void *arg)
346{
347	u_int	cpu, ipi;
348
349	cpu = PCPU_GET(cpuid);
350
351	ipi = pic_ipi_read((int)arg);
352
353	while ((ipi != 0x3ff)) {
354		switch (ipi) {
355		case IPI_RENDEZVOUS:
356			CTR0(KTR_SMP, "IPI_RENDEZVOUS");
357			smp_rendezvous_action();
358			break;
359
360		case IPI_AST:
361			CTR0(KTR_SMP, "IPI_AST");
362			break;
363
364		case IPI_STOP:
365			/*
366			 * IPI_STOP_HARD is mapped to IPI_STOP so it is not
367			 * necessary to add it in the switch.
368			 */
369			CTR0(KTR_SMP, "IPI_STOP or IPI_STOP_HARD");
370
371			savectx(&stoppcbs[cpu]);
372
373			/*
374			 * CPUs are stopped when entering the debugger and at
375			 * system shutdown, both events which can precede a
376			 * panic dump.  For the dump to be correct, all caches
377			 * must be flushed and invalidated, but on ARM there's
378			 * no way to broadcast a wbinv_all to other cores.
379			 * Instead, we have each core do the local wbinv_all as
380			 * part of stopping the core.  The core requesting the
381			 * stop will do the l2 cache flush after all other cores
382			 * have done their l1 flushes and stopped.
383			 */
384			cpu_idcache_wbinv_all();
385
386			/* Indicate we are stopped */
387			CPU_SET_ATOMIC(cpu, &stopped_cpus);
388
389			/* Wait for restart */
390			while (!CPU_ISSET(cpu, &started_cpus))
391				cpu_spinwait();
392
393			CPU_CLR_ATOMIC(cpu, &started_cpus);
394			CPU_CLR_ATOMIC(cpu, &stopped_cpus);
395#ifdef DDB
396			dbg_resume_dbreg();
397#endif
398			CTR0(KTR_SMP, "IPI_STOP (restart)");
399			break;
400		case IPI_PREEMPT:
401			CTR1(KTR_SMP, "%s: IPI_PREEMPT", __func__);
402			sched_preempt(curthread);
403			break;
404		case IPI_HARDCLOCK:
405			CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
406			hardclockintr();
407			break;
408		default:
409			panic("Unknown IPI 0x%0x on cpu %d", ipi, curcpu);
410		}
411
412		pic_ipi_clear(ipi);
413		ipi = pic_ipi_read(-1);
414	}
415
416	return (FILTER_HANDLED);
417}
418#endif
419
420static void
421release_aps(void *dummy __unused)
422{
423	uint32_t loop_counter;
424#ifndef ARM_INTRNG
425	int start = 0, end = 0;
426#endif
427
428	if (mp_ncpus == 1)
429		return;
430
431#ifdef ARM_INTRNG
432	intr_ipi_set_handler(IPI_RENDEZVOUS, "rendezvous", ipi_rendezvous, NULL, 0);
433	intr_ipi_set_handler(IPI_AST, "ast", ipi_ast, NULL, 0);
434	intr_ipi_set_handler(IPI_STOP, "stop", ipi_stop, NULL, 0);
435	intr_ipi_set_handler(IPI_PREEMPT, "preempt", ipi_preempt, NULL, 0);
436	intr_ipi_set_handler(IPI_HARDCLOCK, "hardclock", ipi_hardclock, NULL, 0);
437
438#else
439#ifdef IPI_IRQ_START
440	start = IPI_IRQ_START;
441#ifdef IPI_IRQ_END
442	end = IPI_IRQ_END;
443#else
444	end = IPI_IRQ_START;
445#endif
446#endif
447
448	for (int i = start; i <= end; i++) {
449		/*
450		 * IPI handler
451		 */
452		/*
453		 * Use 0xdeadbeef as the argument value for irq 0,
454		 * if we used 0, the intr code will give the trap frame
455		 * pointer instead.
456		 */
457		arm_setup_irqhandler("ipi", ipi_handler, NULL, (void *)i, i,
458		    INTR_TYPE_MISC | INTR_EXCL, NULL);
459
460		/* Enable ipi */
461		arm_unmask_irq(i);
462	}
463#endif
464	atomic_store_rel_int(&aps_ready, 1);
465	/* Wake the other threads up */
466#if __ARM_ARCH >= 7
467	armv7_sev();
468#endif
469
470	printf("Release APs\n");
471
472	for (loop_counter = 0; loop_counter < 2000; loop_counter++) {
473		if (smp_started)
474			return;
475		DELAY(1000);
476	}
477	printf("AP's not started\n");
478}
479
480SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
481
482struct cpu_group *
483cpu_topo(void)
484{
485
486	return (smp_topo_1level(CG_SHARE_L2, mp_ncpus, 0));
487}
488
489void
490cpu_mp_setmaxid(void)
491{
492
493	platform_mp_setmaxid();
494}
495
496/* Sending IPI */
497void
498ipi_all_but_self(u_int ipi)
499{
500	cpuset_t other_cpus;
501
502	other_cpus = all_cpus;
503	CPU_CLR(PCPU_GET(cpuid), &other_cpus);
504	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
505	platform_ipi_send(other_cpus, ipi);
506}
507
508void
509ipi_cpu(int cpu, u_int ipi)
510{
511	cpuset_t cpus;
512
513	CPU_ZERO(&cpus);
514	CPU_SET(cpu, &cpus);
515
516	CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x", __func__, cpu, ipi);
517	platform_ipi_send(cpus, ipi);
518}
519
520void
521ipi_selected(cpuset_t cpus, u_int ipi)
522{
523
524	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
525	platform_ipi_send(cpus, ipi);
526}
527
528