subr_smp.c revision 209059
1/*-
2 * Copyright (c) 2001, John Baldwin <jhb@FreeBSD.org>.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the author nor the names of any co-contributors
14 *    may be used to endorse or promote products derived from this software
15 *    without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30/*
31 * This module holds the global variables and machine independent functions
32 * used for the kernel SMP support.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD: head/sys/kern/subr_smp.c 209059 2010-06-11 18:46:34Z jhb $");
37
38#include <sys/param.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/ktr.h>
42#include <sys/proc.h>
43#include <sys/bus.h>
44#include <sys/lock.h>
45#include <sys/mutex.h>
46#include <sys/pcpu.h>
47#include <sys/smp.h>
48#include <sys/sysctl.h>
49
50#include <machine/cpu.h>
51#include <machine/smp.h>
52
53#include "opt_sched.h"
54
55#ifdef SMP
56volatile cpumask_t stopped_cpus;
57volatile cpumask_t started_cpus;
58cpumask_t idle_cpus_mask;
59cpumask_t hlt_cpus_mask;
60cpumask_t logical_cpus_mask;
61
62void (*cpustop_restartfunc)(void);
63#endif
64/* This is used in modules that need to work in both SMP and UP. */
65cpumask_t all_cpus;
66
67int mp_ncpus;
68/* export this for libkvm consumers. */
69int mp_maxcpus = MAXCPU;
70
71volatile int smp_started;
72u_int mp_maxid;
73
74SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD, NULL, "Kernel SMP");
75
76SYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD, &mp_maxid, 0,
77    "Max CPU ID.");
78
79SYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD, &mp_maxcpus, 0,
80    "Max number of CPUs that the system was compiled for.");
81
82int smp_active = 0;	/* are the APs allowed to run? */
83SYSCTL_INT(_kern_smp, OID_AUTO, active, CTLFLAG_RW, &smp_active, 0,
84    "Number of Auxillary Processors (APs) that were successfully started");
85
86int smp_disabled = 0;	/* has smp been disabled? */
87SYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN, &smp_disabled, 0,
88    "SMP has been disabled from the loader");
89TUNABLE_INT("kern.smp.disabled", &smp_disabled);
90
91int smp_cpus = 1;	/* how many cpu's running */
92SYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD, &smp_cpus, 0,
93    "Number of CPUs online");
94
95int smp_topology = 0;	/* Which topology we're using. */
96SYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RD, &smp_topology, 0,
97    "Topology override setting; 0 is default provided by hardware.");
98TUNABLE_INT("kern.smp.topology", &smp_topology);
99
100#ifdef SMP
101/* Enable forwarding of a signal to a process running on a different CPU */
102static int forward_signal_enabled = 1;
103SYSCTL_INT(_kern_smp, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
104	   &forward_signal_enabled, 0,
105	   "Forwarding of a signal to a process on a different CPU");
106
107/* Variables needed for SMP rendezvous. */
108static volatile int smp_rv_ncpus;
109static void (*volatile smp_rv_setup_func)(void *arg);
110static void (*volatile smp_rv_action_func)(void *arg);
111static void (*volatile smp_rv_teardown_func)(void *arg);
112static void *volatile smp_rv_func_arg;
113static volatile int smp_rv_waiters[3];
114
115/*
116 * Shared mutex to restrict busywaits between smp_rendezvous() and
117 * smp(_targeted)_tlb_shootdown().  A deadlock occurs if both of these
118 * functions trigger at once and cause multiple CPUs to busywait with
119 * interrupts disabled.
120 */
121struct mtx smp_ipi_mtx;
122
123/*
124 * Let the MD SMP code initialize mp_maxid very early if it can.
125 */
126static void
127mp_setmaxid(void *dummy)
128{
129	cpu_mp_setmaxid();
130}
131SYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL);
132
133/*
134 * Call the MD SMP initialization code.
135 */
136static void
137mp_start(void *dummy)
138{
139
140	mtx_init(&smp_ipi_mtx, "smp rendezvous", NULL, MTX_SPIN);
141
142	/* Probe for MP hardware. */
143	if (smp_disabled != 0 || cpu_mp_probe() == 0) {
144		mp_ncpus = 1;
145		all_cpus = PCPU_GET(cpumask);
146		return;
147	}
148
149	cpu_mp_start();
150	printf("FreeBSD/SMP: Multiprocessor System Detected: %d CPUs\n",
151	    mp_ncpus);
152	cpu_mp_announce();
153}
154SYSINIT(cpu_mp, SI_SUB_CPU, SI_ORDER_THIRD, mp_start, NULL);
155
156void
157forward_signal(struct thread *td)
158{
159	int id;
160
161	/*
162	 * signotify() has already set TDF_ASTPENDING and TDF_NEEDSIGCHECK on
163	 * this thread, so all we need to do is poke it if it is currently
164	 * executing so that it executes ast().
165	 */
166	THREAD_LOCK_ASSERT(td, MA_OWNED);
167	KASSERT(TD_IS_RUNNING(td),
168	    ("forward_signal: thread is not TDS_RUNNING"));
169
170	CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc);
171
172	if (!smp_started || cold || panicstr)
173		return;
174	if (!forward_signal_enabled)
175		return;
176
177	/* No need to IPI ourself. */
178	if (td == curthread)
179		return;
180
181	id = td->td_oncpu;
182	if (id == NOCPU)
183		return;
184	ipi_selected(1 << id, IPI_AST);
185}
186
187/*
188 * When called the executing CPU will send an IPI to all other CPUs
189 *  requesting that they halt execution.
190 *
191 * Usually (but not necessarily) called with 'other_cpus' as its arg.
192 *
193 *  - Signals all CPUs in map to stop.
194 *  - Waits for each to stop.
195 *
196 * Returns:
197 *  -1: error
198 *   0: NA
199 *   1: ok
200 *
201 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
202 *            from executing at same time.
203 */
204static int
205generic_stop_cpus(cpumask_t map, u_int type)
206{
207	int i;
208
209	KASSERT(type == IPI_STOP || type == IPI_STOP_HARD,
210	    ("%s: invalid stop type", __func__));
211
212	if (!smp_started)
213		return 0;
214
215	CTR2(KTR_SMP, "stop_cpus(%x) with %u type", map, type);
216
217	/* send the stop IPI to all CPUs in map */
218	ipi_selected(map, type);
219
220	i = 0;
221	while ((stopped_cpus & map) != map) {
222		/* spin */
223		cpu_spinwait();
224		i++;
225#ifdef DIAGNOSTIC
226		if (i == 100000) {
227			printf("timeout stopping cpus\n");
228			break;
229		}
230#endif
231	}
232
233	return 1;
234}
235
236int
237stop_cpus(cpumask_t map)
238{
239
240	return (generic_stop_cpus(map, IPI_STOP));
241}
242
243int
244stop_cpus_hard(cpumask_t map)
245{
246
247	return (generic_stop_cpus(map, IPI_STOP_HARD));
248}
249
250#if defined(__amd64__)
251/*
252 * When called the executing CPU will send an IPI to all other CPUs
253 *  requesting that they halt execution.
254 *
255 * Usually (but not necessarily) called with 'other_cpus' as its arg.
256 *
257 *  - Signals all CPUs in map to suspend.
258 *  - Waits for each to suspend.
259 *
260 * Returns:
261 *  -1: error
262 *   0: NA
263 *   1: ok
264 *
265 * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs
266 *            from executing at same time.
267 */
268int
269suspend_cpus(cpumask_t map)
270{
271	int i;
272
273	if (!smp_started)
274		return (0);
275
276	CTR1(KTR_SMP, "suspend_cpus(%x)", map);
277
278	/* send the suspend IPI to all CPUs in map */
279	ipi_selected(map, IPI_SUSPEND);
280
281	i = 0;
282	while ((stopped_cpus & map) != map) {
283		/* spin */
284		cpu_spinwait();
285		i++;
286#ifdef DIAGNOSTIC
287		if (i == 100000) {
288			printf("timeout suspending cpus\n");
289			break;
290		}
291#endif
292	}
293
294	return (1);
295}
296#endif
297
298/*
299 * Called by a CPU to restart stopped CPUs.
300 *
301 * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
302 *
303 *  - Signals all CPUs in map to restart.
304 *  - Waits for each to restart.
305 *
306 * Returns:
307 *  -1: error
308 *   0: NA
309 *   1: ok
310 */
311int
312restart_cpus(cpumask_t map)
313{
314
315	if (!smp_started)
316		return 0;
317
318	CTR1(KTR_SMP, "restart_cpus(%x)", map);
319
320	/* signal other cpus to restart */
321	atomic_store_rel_int(&started_cpus, map);
322
323	/* wait for each to clear its bit */
324	while ((stopped_cpus & map) != 0)
325		cpu_spinwait();
326
327	return 1;
328}
329
330/*
331 * All-CPU rendezvous.  CPUs are signalled, all execute the setup function
332 * (if specified), rendezvous, execute the action function (if specified),
333 * rendezvous again, execute the teardown function (if specified), and then
334 * resume.
335 *
336 * Note that the supplied external functions _must_ be reentrant and aware
337 * that they are running in parallel and in an unknown lock context.
338 */
339void
340smp_rendezvous_action(void)
341{
342	void* local_func_arg = smp_rv_func_arg;
343	void (*local_setup_func)(void*)   = smp_rv_setup_func;
344	void (*local_action_func)(void*)   = smp_rv_action_func;
345	void (*local_teardown_func)(void*) = smp_rv_teardown_func;
346
347	/* Ensure we have up-to-date values. */
348	atomic_add_acq_int(&smp_rv_waiters[0], 1);
349	while (smp_rv_waiters[0] < smp_rv_ncpus)
350		cpu_spinwait();
351
352	/* setup function */
353	if (local_setup_func != smp_no_rendevous_barrier) {
354		if (smp_rv_setup_func != NULL)
355			smp_rv_setup_func(smp_rv_func_arg);
356
357		/* spin on entry rendezvous */
358		atomic_add_int(&smp_rv_waiters[1], 1);
359		while (smp_rv_waiters[1] < smp_rv_ncpus)
360                	cpu_spinwait();
361	}
362
363	/* action function */
364	if (local_action_func != NULL)
365		local_action_func(local_func_arg);
366
367	/* spin on exit rendezvous */
368	atomic_add_int(&smp_rv_waiters[2], 1);
369	if (local_teardown_func == smp_no_rendevous_barrier)
370                return;
371	while (smp_rv_waiters[2] < smp_rv_ncpus)
372		cpu_spinwait();
373
374	/* teardown function */
375	if (local_teardown_func != NULL)
376		local_teardown_func(local_func_arg);
377}
378
379void
380smp_rendezvous_cpus(cpumask_t map,
381	void (* setup_func)(void *),
382	void (* action_func)(void *),
383	void (* teardown_func)(void *),
384	void *arg)
385{
386	int i, ncpus = 0;
387
388	if (!smp_started) {
389		if (setup_func != NULL)
390			setup_func(arg);
391		if (action_func != NULL)
392			action_func(arg);
393		if (teardown_func != NULL)
394			teardown_func(arg);
395		return;
396	}
397
398	CPU_FOREACH(i) {
399		if (((1 << i) & map) != 0)
400			ncpus++;
401	}
402	if (ncpus == 0)
403		panic("ncpus is 0 with map=0x%x", map);
404
405	/* obtain rendezvous lock */
406	mtx_lock_spin(&smp_ipi_mtx);
407
408	/* set static function pointers */
409	smp_rv_ncpus = ncpus;
410	smp_rv_setup_func = setup_func;
411	smp_rv_action_func = action_func;
412	smp_rv_teardown_func = teardown_func;
413	smp_rv_func_arg = arg;
414	smp_rv_waiters[1] = 0;
415	smp_rv_waiters[2] = 0;
416	atomic_store_rel_int(&smp_rv_waiters[0], 0);
417
418	/* signal other processors, which will enter the IPI with interrupts off */
419	ipi_selected(map & ~(1 << curcpu), IPI_RENDEZVOUS);
420
421	/* Check if the current CPU is in the map */
422	if ((map & (1 << curcpu)) != 0)
423		smp_rendezvous_action();
424
425	if (teardown_func == smp_no_rendevous_barrier)
426		while (atomic_load_acq_int(&smp_rv_waiters[2]) < ncpus)
427			cpu_spinwait();
428
429	/* release lock */
430	mtx_unlock_spin(&smp_ipi_mtx);
431}
432
433void
434smp_rendezvous(void (* setup_func)(void *),
435	       void (* action_func)(void *),
436	       void (* teardown_func)(void *),
437	       void *arg)
438{
439	smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func, arg);
440}
441
442static struct cpu_group group[MAXCPU];
443
444struct cpu_group *
445smp_topo(void)
446{
447	struct cpu_group *top;
448
449	/*
450	 * Check for a fake topology request for debugging purposes.
451	 */
452	switch (smp_topology) {
453	case 1:
454		/* Dual core with no sharing.  */
455		top = smp_topo_1level(CG_SHARE_NONE, 2, 0);
456		break;
457	case 2:
458		/* No topology, all cpus are equal. */
459		top = smp_topo_none();
460		break;
461	case 3:
462		/* Dual core with shared L2.  */
463		top = smp_topo_1level(CG_SHARE_L2, 2, 0);
464		break;
465	case 4:
466		/* quad core, shared l3 among each package, private l2.  */
467		top = smp_topo_1level(CG_SHARE_L3, 4, 0);
468		break;
469	case 5:
470		/* quad core,  2 dualcore parts on each package share l2.  */
471		top = smp_topo_2level(CG_SHARE_NONE, 2, CG_SHARE_L2, 2, 0);
472		break;
473	case 6:
474		/* Single-core 2xHTT */
475		top = smp_topo_1level(CG_SHARE_L1, 2, CG_FLAG_HTT);
476		break;
477	case 7:
478		/* quad core with a shared l3, 8 threads sharing L2.  */
479		top = smp_topo_2level(CG_SHARE_L3, 4, CG_SHARE_L2, 8,
480		    CG_FLAG_SMT);
481		break;
482	default:
483		/* Default, ask the system what it wants. */
484		top = cpu_topo();
485		break;
486	}
487	/*
488	 * Verify the returned topology.
489	 */
490	if (top->cg_count != mp_ncpus)
491		panic("Built bad topology at %p.  CPU count %d != %d",
492		    top, top->cg_count, mp_ncpus);
493	if (top->cg_mask != all_cpus)
494		panic("Built bad topology at %p.  CPU mask 0x%X != 0x%X",
495		    top, top->cg_mask, all_cpus);
496	return (top);
497}
498
499struct cpu_group *
500smp_topo_none(void)
501{
502	struct cpu_group *top;
503
504	top = &group[0];
505	top->cg_parent = NULL;
506	top->cg_child = NULL;
507	if (mp_ncpus == sizeof(top->cg_mask) * 8)
508		top->cg_mask = -1;
509	else
510		top->cg_mask = (1 << mp_ncpus) - 1;
511	top->cg_count = mp_ncpus;
512	top->cg_children = 0;
513	top->cg_level = CG_SHARE_NONE;
514	top->cg_flags = 0;
515
516	return (top);
517}
518
519static int
520smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
521    int count, int flags, int start)
522{
523	cpumask_t mask;
524	int i;
525
526	for (mask = 0, i = 0; i < count; i++, start++)
527		mask |= (1 << start);
528	child->cg_parent = parent;
529	child->cg_child = NULL;
530	child->cg_children = 0;
531	child->cg_level = share;
532	child->cg_count = count;
533	child->cg_flags = flags;
534	child->cg_mask = mask;
535	parent->cg_children++;
536	for (; parent != NULL; parent = parent->cg_parent) {
537		if ((parent->cg_mask & child->cg_mask) != 0)
538			panic("Duplicate children in %p.  mask 0x%X child 0x%X",
539			    parent, parent->cg_mask, child->cg_mask);
540		parent->cg_mask |= child->cg_mask;
541		parent->cg_count += child->cg_count;
542	}
543
544	return (start);
545}
546
547struct cpu_group *
548smp_topo_1level(int share, int count, int flags)
549{
550	struct cpu_group *child;
551	struct cpu_group *top;
552	int packages;
553	int cpu;
554	int i;
555
556	cpu = 0;
557	top = &group[0];
558	packages = mp_ncpus / count;
559	top->cg_child = child = &group[1];
560	top->cg_level = CG_SHARE_NONE;
561	for (i = 0; i < packages; i++, child++)
562		cpu = smp_topo_addleaf(top, child, share, count, flags, cpu);
563	return (top);
564}
565
566struct cpu_group *
567smp_topo_2level(int l2share, int l2count, int l1share, int l1count,
568    int l1flags)
569{
570	struct cpu_group *top;
571	struct cpu_group *l1g;
572	struct cpu_group *l2g;
573	int cpu;
574	int i;
575	int j;
576
577	cpu = 0;
578	top = &group[0];
579	l2g = &group[1];
580	top->cg_child = l2g;
581	top->cg_level = CG_SHARE_NONE;
582	top->cg_children = mp_ncpus / (l2count * l1count);
583	l1g = l2g + top->cg_children;
584	for (i = 0; i < top->cg_children; i++, l2g++) {
585		l2g->cg_parent = top;
586		l2g->cg_child = l1g;
587		l2g->cg_level = l2share;
588		for (j = 0; j < l2count; j++, l1g++)
589			cpu = smp_topo_addleaf(l2g, l1g, l1share, l1count,
590			    l1flags, cpu);
591	}
592	return (top);
593}
594
595
596struct cpu_group *
597smp_topo_find(struct cpu_group *top, int cpu)
598{
599	struct cpu_group *cg;
600	cpumask_t mask;
601	int children;
602	int i;
603
604	mask = (1 << cpu);
605	cg = top;
606	for (;;) {
607		if ((cg->cg_mask & mask) == 0)
608			return (NULL);
609		if (cg->cg_children == 0)
610			return (cg);
611		children = cg->cg_children;
612		for (i = 0, cg = cg->cg_child; i < children; cg++, i++)
613			if ((cg->cg_mask & mask) != 0)
614				break;
615	}
616	return (NULL);
617}
618#else /* !SMP */
619
620void
621smp_rendezvous_cpus(cpumask_t map,
622	void (*setup_func)(void *),
623	void (*action_func)(void *),
624	void (*teardown_func)(void *),
625	void *arg)
626{
627	if (setup_func != NULL)
628		setup_func(arg);
629	if (action_func != NULL)
630		action_func(arg);
631	if (teardown_func != NULL)
632		teardown_func(arg);
633}
634
635void
636smp_rendezvous(void (*setup_func)(void *),
637	       void (*action_func)(void *),
638	       void (*teardown_func)(void *),
639	       void *arg)
640{
641
642	if (setup_func != NULL)
643		setup_func(arg);
644	if (action_func != NULL)
645		action_func(arg);
646	if (teardown_func != NULL)
647		teardown_func(arg);
648}
649
650/*
651 * Provide dummy SMP support for UP kernels.  Modules that need to use SMP
652 * APIs will still work using this dummy support.
653 */
654static void
655mp_setvariables_for_up(void *dummy)
656{
657	mp_ncpus = 1;
658	mp_maxid = PCPU_GET(cpuid);
659	all_cpus = PCPU_GET(cpumask);
660	KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero"));
661}
662SYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST,
663    mp_setvariables_for_up, NULL);
664#endif /* SMP */
665
666void
667smp_no_rendevous_barrier(void *dummy)
668{
669#ifdef SMP
670	KASSERT((!smp_started),("smp_no_rendevous called and smp is started"));
671#endif
672}
673