1139804Simp/*-
2184601Sjhb * Copyright (c) 2001, John Baldwin <jhb@FreeBSD.org>.
3184601Sjhb * All rights reserved.
425164Speter *
525164Speter * Redistribution and use in source and binary forms, with or without
625164Speter * modification, are permitted provided that the following conditions
725164Speter * are met:
825164Speter * 1. Redistributions of source code must retain the above copyright
925164Speter *    notice, this list of conditions and the following disclaimer.
1076078Sjhb * 2. Redistributions in binary form must reproduce the above copyright
1176078Sjhb *    notice, this list of conditions and the following disclaimer in the
1276078Sjhb *    documentation and/or other materials provided with the distribution.
1325164Speter *
14184601Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1525164Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1625164Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17184601Sjhb * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18184601Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19184601Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20184601Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21184601Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22184601Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23184601Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24184601Sjhb * SUCH DAMAGE.
2525164Speter */
2625164Speter
2776078Sjhb/*
2876078Sjhb * This module holds the global variables and machine independent functions
2976440Sjhb * used for the kernel SMP support.
3076078Sjhb */
3125164Speter
32116182Sobrien#include <sys/cdefs.h>
33116182Sobrien__FBSDID("$FreeBSD$");
34116182Sobrien
3528743Sbde#include <sys/param.h>
3625164Speter#include <sys/systm.h>
3776440Sjhb#include <sys/kernel.h>
3876078Sjhb#include <sys/ktr.h>
3928808Speter#include <sys/proc.h>
40126763Snjl#include <sys/bus.h>
4176078Sjhb#include <sys/lock.h>
4267365Sjhb#include <sys/mutex.h>
4376440Sjhb#include <sys/pcpu.h>
4476078Sjhb#include <sys/smp.h>
4576078Sjhb#include <sys/sysctl.h>
4625164Speter
47171191Sjhb#include <machine/cpu.h>
4891778Sjake#include <machine/smp.h>
4991778Sjake
50134591Sjulian#include "opt_sched.h"
51134591Sjulian
52123125Sjhb#ifdef SMP
53222813Sattiliovolatile cpuset_t stopped_cpus;
54222813Sattiliovolatile cpuset_t started_cpus;
55222813Sattiliocpuset_t hlt_cpus_mask;
56222813Sattiliocpuset_t logical_cpus_mask;
5725164Speter
5892723Salfredvoid (*cpustop_restartfunc)(void);
59123125Sjhb#endif
60134688Sjulian/* This is used in modules that need to work in both SMP and UP. */
61222813Sattiliocpuset_t all_cpus;
62123125Sjhb
6376078Sjhbint mp_ncpus;
64123766Salfred/* export this for libkvm consumers. */
65123766Salfredint mp_maxcpus = MAXCPU;
6628027Sfsmp
6785787Smarcelvolatile int smp_started;
6891673Sjeffu_int mp_maxid;
6925164Speter
70248085Smariusstatic SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL,
71248085Smarius    "Kernel SMP");
7225164Speter
73224159SrwatsonSYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0,
74179230Sjb    "Max CPU ID.");
75179230Sjb
76224159SrwatsonSYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxcpus,
77224159Srwatson    0, "Max number of CPUs that the system was compiled for.");
78123766Salfred
7976078Sjhbint smp_active = 0;	/* are the APs allowed to run? */
80116270SdesSYSCTL_INT(_kern_smp, OID_AUTO, active, CTLFLAG_RW, &smp_active, 0,
81116270Sdes    "Number of Auxillary Processors (APs) that were successfully started");
8226155Sfsmp
83108371Sjakeint smp_disabled = 0;	/* has smp been disabled? */
84224159SrwatsonSYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN|CTLFLAG_CAPRD,
85224159Srwatson    &smp_disabled, 0, "SMP has been disabled from the loader");
86108371SjakeTUNABLE_INT("kern.smp.disabled", &smp_disabled);
87108371Sjake
8876078Sjhbint smp_cpus = 1;	/* how many cpu's running */
89224159SrwatsonSYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD|CTLFLAG_CAPRD, &smp_cpus, 0,
90116270Sdes    "Number of CPUs online");
9127005Sfsmp
92176734Sjeffint smp_topology = 0;	/* Which topology we're using. */
93176734SjeffSYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RD, &smp_topology, 0,
94176734Sjeff    "Topology override setting; 0 is default provided by hardware.");
95176734SjeffTUNABLE_INT("kern.smp.topology", &smp_topology);
96176734Sjeff
97123125Sjhb#ifdef SMP
9871525Sjhb/* Enable forwarding of a signal to a process running on a different CPU */
9971525Sjhbstatic int forward_signal_enabled = 1;
10076078SjhbSYSCTL_INT(_kern_smp, OID_AUTO, forward_signal_enabled, CTLFLAG_RW,
101116270Sdes	   &forward_signal_enabled, 0,
102116270Sdes	   "Forwarding of a signal to a process on a different CPU");
10371525Sjhb
10476078Sjhb/* Variables needed for SMP rendezvous. */
105182292Sjhbstatic volatile int smp_rv_ncpus;
106173444Supsstatic void (*volatile smp_rv_setup_func)(void *arg);
107173444Supsstatic void (*volatile smp_rv_action_func)(void *arg);
108175057Sjhbstatic void (*volatile smp_rv_teardown_func)(void *arg);
109187719Sjhbstatic void *volatile smp_rv_func_arg;
110224527Savgstatic volatile int smp_rv_waiters[4];
11171525Sjhb
112134227Speter/*
113134227Speter * Shared mutex to restrict busywaits between smp_rendezvous() and
114134227Speter * smp(_targeted)_tlb_shootdown().  A deadlock occurs if both of these
115134227Speter * functions trigger at once and cause multiple CPUs to busywait with
116134227Speter * interrupts disabled.
117134227Speter */
118134416Sobrienstruct mtx smp_ipi_mtx;
119134227Speter
12025164Speter/*
121122947Sjhb * Let the MD SMP code initialize mp_maxid very early if it can.
12225164Speter */
12371525Sjhbstatic void
124122947Sjhbmp_setmaxid(void *dummy)
12591673Sjeff{
126122947Sjhb	cpu_mp_setmaxid();
12791673Sjeff}
128177253SrwatsonSYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL);
12991673Sjeff
13091673Sjeff/*
13191673Sjeff * Call the MD SMP initialization code.
13291673Sjeff */
13391673Sjeffstatic void
13476078Sjhbmp_start(void *dummy)
13571525Sjhb{
13671525Sjhb
137207921Sattilio	mtx_init(&smp_ipi_mtx, "smp rendezvous", NULL, MTX_SPIN);
138207921Sattilio
13976078Sjhb	/* Probe for MP hardware. */
140122947Sjhb	if (smp_disabled != 0 || cpu_mp_probe() == 0) {
141121756Sjhb		mp_ncpus = 1;
142223758Sattilio		CPU_SETOF(PCPU_GET(cpuid), &all_cpus);
14376078Sjhb		return;
144121756Sjhb	}
14576078Sjhb
14676078Sjhb	cpu_mp_start();
14776078Sjhb	printf("FreeBSD/SMP: Multiprocessor System Detected: %d CPUs\n",
14876078Sjhb	    mp_ncpus);
14976078Sjhb	cpu_mp_announce();
15071525Sjhb}
151177253SrwatsonSYSINIT(cpu_mp, SI_SUB_CPU, SI_ORDER_THIRD, mp_start, NULL);
15271525Sjhb
15325164Spetervoid
15483366Sjulianforward_signal(struct thread *td)
15525164Speter{
15676078Sjhb	int id;
15725164Speter
15826108Sfsmp	/*
159112888Sjeff	 * signotify() has already set TDF_ASTPENDING and TDF_NEEDSIGCHECK on
160112888Sjeff	 * this thread, so all we need to do is poke it if it is currently
16193873Sbde	 * executing so that it executes ast().
16226108Sfsmp	 */
163170307Sjeff	THREAD_LOCK_ASSERT(td, MA_OWNED);
164103216Sjulian	KASSERT(TD_IS_RUNNING(td),
16599072Sjulian	    ("forward_signal: thread is not TDS_RUNNING"));
16631639Sfsmp
16783366Sjulian	CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc);
16865557Sjasone
16976078Sjhb	if (!smp_started || cold || panicstr)
17031639Sfsmp		return;
17176078Sjhb	if (!forward_signal_enabled)
17276078Sjhb		return;
17331639Sfsmp
17476078Sjhb	/* No need to IPI ourself. */
17583366Sjulian	if (td == curthread)
17631639Sfsmp		return;
17731639Sfsmp
178113339Sjulian	id = td->td_oncpu;
17976078Sjhb	if (id == NOCPU)
18034020Stegge		return;
181210939Sjhb	ipi_cpu(id, IPI_AST);
18234020Stegge}
18334021Stegge
18471525Sjhb/*
18571525Sjhb * When called the executing CPU will send an IPI to all other CPUs
18671525Sjhb *  requesting that they halt execution.
18771525Sjhb *
18871525Sjhb * Usually (but not necessarily) called with 'other_cpus' as its arg.
18971525Sjhb *
19071525Sjhb *  - Signals all CPUs in map to stop.
19171525Sjhb *  - Waits for each to stop.
19271525Sjhb *
19371525Sjhb * Returns:
19471525Sjhb *  -1: error
19571525Sjhb *   0: NA
19671525Sjhb *   1: ok
19771525Sjhb *
19871525Sjhb */
199196196Sattiliostatic int
200222813Sattiliogeneric_stop_cpus(cpuset_t map, u_int type)
20171525Sjhb{
202222813Sattilio#ifdef KTR
203222813Sattilio	char cpusetbuf[CPUSETBUFSIZ];
204222813Sattilio#endif
205213736Savg	static volatile u_int stopping_cpu = NOCPU;
20676078Sjhb	int i;
20736135Stegge
208213736Savg	KASSERT(
209235796Siwasaki#if defined(__amd64__) || defined(__i386__)
210213736Savg	    type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
211213736Savg#else
212213736Savg	    type == IPI_STOP || type == IPI_STOP_HARD,
213213736Savg#endif
214196196Sattilio	    ("%s: invalid stop type", __func__));
215196196Sattilio
21671525Sjhb	if (!smp_started)
217213736Savg		return (0);
21871525Sjhb
219222813Sattilio	CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
220222813Sattilio	    cpusetobj_strprint(cpusetbuf, &map), type);
22176078Sjhb
222213736Savg	if (stopping_cpu != PCPU_GET(cpuid))
223213736Savg		while (atomic_cmpset_int(&stopping_cpu, NOCPU,
224213736Savg		    PCPU_GET(cpuid)) == 0)
225213736Savg			while (stopping_cpu != NOCPU)
226213736Savg				cpu_spinwait(); /* spin */
227213736Savg
22876078Sjhb	/* send the stop IPI to all CPUs in map */
229196196Sattilio	ipi_selected(map, type);
230127498Smarcel
23176078Sjhb	i = 0;
232222813Sattilio	while (!CPU_SUBSET(&stopped_cpus, &map)) {
23376078Sjhb		/* spin */
234171191Sjhb		cpu_spinwait();
23576078Sjhb		i++;
236223530Savg		if (i == 100000000) {
23776078Sjhb			printf("timeout stopping cpus\n");
23876078Sjhb			break;
23976078Sjhb		}
24076078Sjhb	}
24171525Sjhb
242213736Savg	stopping_cpu = NOCPU;
243213736Savg	return (1);
24471525Sjhb}
24571525Sjhb
246196196Sattilioint
247222813Sattiliostop_cpus(cpuset_t map)
248196196Sattilio{
249196196Sattilio
250196196Sattilio	return (generic_stop_cpus(map, IPI_STOP));
251196196Sattilio}
252196196Sattilio
253196196Sattilioint
254222813Sattiliostop_cpus_hard(cpuset_t map)
255196196Sattilio{
256196196Sattilio
257196196Sattilio	return (generic_stop_cpus(map, IPI_STOP_HARD));
258196196Sattilio}
259196196Sattilio
260235796Siwasaki#if defined(__amd64__) || defined(__i386__)
261189903Sjkimint
262222813Sattiliosuspend_cpus(cpuset_t map)
263189903Sjkim{
264189903Sjkim
265213736Savg	return (generic_stop_cpus(map, IPI_SUSPEND));
266189903Sjkim}
267189903Sjkim#endif
268189903Sjkim
269189903Sjkim/*
27071525Sjhb * Called by a CPU to restart stopped CPUs.
27171525Sjhb *
27271525Sjhb * Usually (but not necessarily) called with 'stopped_cpus' as its arg.
27371525Sjhb *
27471525Sjhb *  - Signals all CPUs in map to restart.
27571525Sjhb *  - Waits for each to restart.
27671525Sjhb *
27771525Sjhb * Returns:
27871525Sjhb *  -1: error
27971525Sjhb *   0: NA
28071525Sjhb *   1: ok
28171525Sjhb */
28271525Sjhbint
283222813Sattiliorestart_cpus(cpuset_t map)
28471525Sjhb{
285222813Sattilio#ifdef KTR
286222813Sattilio	char cpusetbuf[CPUSETBUFSIZ];
287222813Sattilio#endif
28871525Sjhb
28971525Sjhb	if (!smp_started)
29071525Sjhb		return 0;
29171525Sjhb
292222813Sattilio	CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
29371525Sjhb
29476078Sjhb	/* signal other cpus to restart */
295222813Sattilio	CPU_COPY_STORE_REL(&map, &started_cpus);
29676078Sjhb
29771525Sjhb	/* wait for each to clear its bit */
298222813Sattilio	while (CPU_OVERLAP(&stopped_cpus, &map))
299171191Sjhb		cpu_spinwait();
30071525Sjhb
30171525Sjhb	return 1;
30271525Sjhb}
30371525Sjhb
30434021Stegge/*
30548924Smsmith * All-CPU rendezvous.  CPUs are signalled, all execute the setup function
30648924Smsmith * (if specified), rendezvous, execute the action function (if specified),
30748924Smsmith * rendezvous again, execute the teardown function (if specified), and then
30848924Smsmith * resume.
30948924Smsmith *
31048924Smsmith * Note that the supplied external functions _must_ be reentrant and aware
31148924Smsmith * that they are running in parallel and in an unknown lock context.
31248924Smsmith */
31348924Smsmithvoid
31448924Smsmithsmp_rendezvous_action(void)
31548924Smsmith{
316222254Sjhb	struct thread *td;
317222032Sjhb	void *local_func_arg;
318222032Sjhb	void (*local_setup_func)(void*);
319222032Sjhb	void (*local_action_func)(void*);
320222032Sjhb	void (*local_teardown_func)(void*);
321222254Sjhb#ifdef INVARIANTS
322222254Sjhb	int owepreempt;
323222254Sjhb#endif
324175057Sjhb
325171191Sjhb	/* Ensure we have up-to-date values. */
326171191Sjhb	atomic_add_acq_int(&smp_rv_waiters[0], 1);
327182292Sjhb	while (smp_rv_waiters[0] < smp_rv_ncpus)
328171191Sjhb		cpu_spinwait();
329171191Sjhb
330222032Sjhb	/* Fetch rendezvous parameters after acquire barrier. */
331222032Sjhb	local_func_arg = smp_rv_func_arg;
332222032Sjhb	local_setup_func = smp_rv_setup_func;
333222032Sjhb	local_action_func = smp_rv_action_func;
334222032Sjhb	local_teardown_func = smp_rv_teardown_func;
335222032Sjhb
336222032Sjhb	/*
337222254Sjhb	 * Use a nested critical section to prevent any preemptions
338222254Sjhb	 * from occurring during a rendezvous action routine.
339222254Sjhb	 * Specifically, if a rendezvous handler is invoked via an IPI
340222254Sjhb	 * and the interrupted thread was in the critical_exit()
341222254Sjhb	 * function after setting td_critnest to 0 but before
342222254Sjhb	 * performing a deferred preemption, this routine can be
343222254Sjhb	 * invoked with td_critnest set to 0 and td_owepreempt true.
344222254Sjhb	 * In that case, a critical_exit() during the rendezvous
345222254Sjhb	 * action would trigger a preemption which is not permitted in
346222254Sjhb	 * a rendezvous action.  To fix this, wrap all of the
347222254Sjhb	 * rendezvous action handlers in a critical section.  We
348222254Sjhb	 * cannot use a regular critical section however as having
349222254Sjhb	 * critical_exit() preempt from this routine would also be
350222254Sjhb	 * problematic (the preemption must not occur before the IPI
351222266Sjhb	 * has been acknowledged via an EOI).  Instead, we
352222254Sjhb	 * intentionally ignore td_owepreempt when leaving the
353222266Sjhb	 * critical section.  This should be harmless because we do
354222266Sjhb	 * not permit rendezvous action routines to schedule threads,
355222266Sjhb	 * and thus td_owepreempt should never transition from 0 to 1
356222254Sjhb	 * during this routine.
357222254Sjhb	 */
358222254Sjhb	td = curthread;
359222254Sjhb	td->td_critnest++;
360222254Sjhb#ifdef INVARIANTS
361222254Sjhb	owepreempt = td->td_owepreempt;
362222254Sjhb#endif
363222254Sjhb
364222254Sjhb	/*
365222032Sjhb	 * If requested, run a setup function before the main action
366222032Sjhb	 * function.  Ensure all CPUs have completed the setup
367222032Sjhb	 * function before moving on to the action function.
368222032Sjhb	 */
369173444Sups	if (local_setup_func != smp_no_rendevous_barrier) {
370173444Sups		if (smp_rv_setup_func != NULL)
371173444Sups			smp_rv_setup_func(smp_rv_func_arg);
372173444Sups		atomic_add_int(&smp_rv_waiters[1], 1);
373182292Sjhb		while (smp_rv_waiters[1] < smp_rv_ncpus)
374173444Sups                	cpu_spinwait();
375173444Sups	}
376171191Sjhb
377173444Sups	if (local_action_func != NULL)
378173444Sups		local_action_func(local_func_arg);
379171191Sjhb
380222254Sjhb	if (local_teardown_func != smp_no_rendevous_barrier) {
381224527Savg		/*
382224527Savg		 * Signal that the main action has been completed.  If a
383224527Savg		 * full exit rendezvous is requested, then all CPUs will
384224527Savg		 * wait here until all CPUs have finished the main action.
385224527Savg		 */
386224527Savg		atomic_add_int(&smp_rv_waiters[2], 1);
387224527Savg		while (smp_rv_waiters[2] < smp_rv_ncpus)
388222254Sjhb			cpu_spinwait();
389175057Sjhb
390222254Sjhb		if (local_teardown_func != NULL)
391222254Sjhb			local_teardown_func(local_func_arg);
392222254Sjhb	}
393222254Sjhb
394224527Savg	/*
395224527Savg	 * Signal that the rendezvous is fully completed by this CPU.
396224527Savg	 * This means that no member of smp_rv_* pseudo-structure will be
397224527Savg	 * accessed by this target CPU after this point; in particular,
398224527Savg	 * memory pointed by smp_rv_func_arg.
399224527Savg	 */
400224527Savg	atomic_add_int(&smp_rv_waiters[3], 1);
401224527Savg
402222254Sjhb	td->td_critnest--;
403222254Sjhb	KASSERT(owepreempt == td->td_owepreempt,
404222254Sjhb	    ("rendezvous action changed td_owepreempt"));
40548924Smsmith}
40648924Smsmith
40748924Smsmithvoid
408222813Sattiliosmp_rendezvous_cpus(cpuset_t map,
409179230Sjb	void (* setup_func)(void *),
410179230Sjb	void (* action_func)(void *),
411179230Sjb	void (* teardown_func)(void *),
412179230Sjb	void *arg)
41348924Smsmith{
414222813Sattilio	int curcpumap, i, ncpus = 0;
41571576Sjasone
416227888Sattilio	/* Look comments in the !SMP case. */
41776078Sjhb	if (!smp_started) {
418227888Sattilio		spinlock_enter();
41976078Sjhb		if (setup_func != NULL)
42076078Sjhb			setup_func(arg);
42176078Sjhb		if (action_func != NULL)
42276078Sjhb			action_func(arg);
42376078Sjhb		if (teardown_func != NULL)
42476078Sjhb			teardown_func(arg);
425227888Sattilio		spinlock_exit();
42676078Sjhb		return;
42776078Sjhb	}
428179230Sjb
429209059Sjhb	CPU_FOREACH(i) {
430222813Sattilio		if (CPU_ISSET(i, &map))
431179230Sjb			ncpus++;
432209059Sjhb	}
433189232Sdchagin	if (ncpus == 0)
434222813Sattilio		panic("ncpus is 0 with non-zero map");
435182292Sjhb
436134416Sobrien	mtx_lock_spin(&smp_ipi_mtx);
43748924Smsmith
438222032Sjhb	/* Pass rendezvous parameters via global variables. */
439182292Sjhb	smp_rv_ncpus = ncpus;
44048924Smsmith	smp_rv_setup_func = setup_func;
44148924Smsmith	smp_rv_action_func = action_func;
44248924Smsmith	smp_rv_teardown_func = teardown_func;
44348924Smsmith	smp_rv_func_arg = arg;
44448924Smsmith	smp_rv_waiters[1] = 0;
445171191Sjhb	smp_rv_waiters[2] = 0;
446224527Savg	smp_rv_waiters[3] = 0;
447171191Sjhb	atomic_store_rel_int(&smp_rv_waiters[0], 0);
44848924Smsmith
449222032Sjhb	/*
450222032Sjhb	 * Signal other processors, which will enter the IPI with
451222032Sjhb	 * interrupts off.
452222032Sjhb	 */
453222813Sattilio	curcpumap = CPU_ISSET(curcpu, &map);
454222813Sattilio	CPU_CLR(curcpu, &map);
455222813Sattilio	ipi_selected(map, IPI_RENDEZVOUS);
45648924Smsmith
457179230Sjb	/* Check if the current CPU is in the map */
458222813Sattilio	if (curcpumap != 0)
459179230Sjb		smp_rendezvous_action();
46048924Smsmith
461222032Sjhb	/*
462224527Savg	 * Ensure that the master CPU waits for all the other
463224527Savg	 * CPUs to finish the rendezvous, so that smp_rv_*
464224527Savg	 * pseudo-structure and the arg are guaranteed to not
465224527Savg	 * be in use.
466222032Sjhb	 */
467224527Savg	while (atomic_load_acq_int(&smp_rv_waiters[3]) < ncpus)
468224527Savg		cpu_spinwait();
469175057Sjhb
470134416Sobrien	mtx_unlock_spin(&smp_ipi_mtx);
47148924Smsmith}
472123125Sjhb
473179230Sjbvoid
474179230Sjbsmp_rendezvous(void (* setup_func)(void *),
475179230Sjb	       void (* action_func)(void *),
476179230Sjb	       void (* teardown_func)(void *),
477179230Sjb	       void *arg)
478179230Sjb{
479179230Sjb	smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func, arg);
480179230Sjb}
481179230Sjb
482176734Sjeffstatic struct cpu_group group[MAXCPU];
483176734Sjeff
484176734Sjeffstruct cpu_group *
485176734Sjeffsmp_topo(void)
486123125Sjhb{
487222813Sattilio	char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
488176734Sjeff	struct cpu_group *top;
489176734Sjeff
490176734Sjeff	/*
491176734Sjeff	 * Check for a fake topology request for debugging purposes.
492176734Sjeff	 */
493176734Sjeff	switch (smp_topology) {
494176734Sjeff	case 1:
495176734Sjeff		/* Dual core with no sharing.  */
496176734Sjeff		top = smp_topo_1level(CG_SHARE_NONE, 2, 0);
497176734Sjeff		break;
498177007Sjeff	case 2:
499177007Sjeff		/* No topology, all cpus are equal. */
500177007Sjeff		top = smp_topo_none();
501177007Sjeff		break;
502176734Sjeff	case 3:
503176734Sjeff		/* Dual core with shared L2.  */
504176734Sjeff		top = smp_topo_1level(CG_SHARE_L2, 2, 0);
505176734Sjeff		break;
506176734Sjeff	case 4:
507176734Sjeff		/* quad core, shared l3 among each package, private l2.  */
508176734Sjeff		top = smp_topo_1level(CG_SHARE_L3, 4, 0);
509176734Sjeff		break;
510176734Sjeff	case 5:
511176734Sjeff		/* quad core,  2 dualcore parts on each package share l2.  */
512176734Sjeff		top = smp_topo_2level(CG_SHARE_NONE, 2, CG_SHARE_L2, 2, 0);
513176734Sjeff		break;
514176734Sjeff	case 6:
515176734Sjeff		/* Single-core 2xHTT */
516176734Sjeff		top = smp_topo_1level(CG_SHARE_L1, 2, CG_FLAG_HTT);
517176734Sjeff		break;
518176734Sjeff	case 7:
519176734Sjeff		/* quad core with a shared l3, 8 threads sharing L2.  */
520176734Sjeff		top = smp_topo_2level(CG_SHARE_L3, 4, CG_SHARE_L2, 8,
521191643Sjeff		    CG_FLAG_SMT);
522176734Sjeff		break;
523176734Sjeff	default:
524176734Sjeff		/* Default, ask the system what it wants. */
525176734Sjeff		top = cpu_topo();
526176734Sjeff		break;
527176734Sjeff	}
528176734Sjeff	/*
529176734Sjeff	 * Verify the returned topology.
530176734Sjeff	 */
531176734Sjeff	if (top->cg_count != mp_ncpus)
532176734Sjeff		panic("Built bad topology at %p.  CPU count %d != %d",
533176734Sjeff		    top, top->cg_count, mp_ncpus);
534222813Sattilio	if (CPU_CMP(&top->cg_mask, &all_cpus))
535222813Sattilio		panic("Built bad topology at %p.  CPU mask (%s) != (%s)",
536222813Sattilio		    top, cpusetobj_strprint(cpusetbuf, &top->cg_mask),
537222813Sattilio		    cpusetobj_strprint(cpusetbuf2, &all_cpus));
538176734Sjeff	return (top);
539123125Sjhb}
540123125Sjhb
541176734Sjeffstruct cpu_group *
542176734Sjeffsmp_topo_none(void)
543176734Sjeff{
544176734Sjeff	struct cpu_group *top;
545176734Sjeff
546176734Sjeff	top = &group[0];
547176734Sjeff	top->cg_parent = NULL;
548176734Sjeff	top->cg_child = NULL;
549218584Sjmallett	top->cg_mask = all_cpus;
550176734Sjeff	top->cg_count = mp_ncpus;
551176734Sjeff	top->cg_children = 0;
552176734Sjeff	top->cg_level = CG_SHARE_NONE;
553176734Sjeff	top->cg_flags = 0;
554176734Sjeff
555176734Sjeff	return (top);
556176734Sjeff}
557176734Sjeff
558176734Sjeffstatic int
559176734Sjeffsmp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
560176734Sjeff    int count, int flags, int start)
561176734Sjeff{
562222813Sattilio	char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
563222813Sattilio	cpuset_t mask;
564176734Sjeff	int i;
565176734Sjeff
566222813Sattilio	CPU_ZERO(&mask);
567222813Sattilio	for (i = 0; i < count; i++, start++)
568222813Sattilio		CPU_SET(start, &mask);
569176734Sjeff	child->cg_parent = parent;
570176734Sjeff	child->cg_child = NULL;
571176734Sjeff	child->cg_children = 0;
572176734Sjeff	child->cg_level = share;
573176734Sjeff	child->cg_count = count;
574176734Sjeff	child->cg_flags = flags;
575176734Sjeff	child->cg_mask = mask;
576176734Sjeff	parent->cg_children++;
577176734Sjeff	for (; parent != NULL; parent = parent->cg_parent) {
578222813Sattilio		if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask))
579222813Sattilio			panic("Duplicate children in %p.  mask (%s) child (%s)",
580222813Sattilio			    parent,
581222813Sattilio			    cpusetobj_strprint(cpusetbuf, &parent->cg_mask),
582222813Sattilio			    cpusetobj_strprint(cpusetbuf2, &child->cg_mask));
583222813Sattilio		CPU_OR(&parent->cg_mask, &child->cg_mask);
584176734Sjeff		parent->cg_count += child->cg_count;
585176734Sjeff	}
586176734Sjeff
587176734Sjeff	return (start);
588176734Sjeff}
589176734Sjeff
590176734Sjeffstruct cpu_group *
591176734Sjeffsmp_topo_1level(int share, int count, int flags)
592176734Sjeff{
593176734Sjeff	struct cpu_group *child;
594176734Sjeff	struct cpu_group *top;
595176734Sjeff	int packages;
596176734Sjeff	int cpu;
597176734Sjeff	int i;
598176734Sjeff
599176734Sjeff	cpu = 0;
600176734Sjeff	top = &group[0];
601176734Sjeff	packages = mp_ncpus / count;
602176734Sjeff	top->cg_child = child = &group[1];
603176734Sjeff	top->cg_level = CG_SHARE_NONE;
604176734Sjeff	for (i = 0; i < packages; i++, child++)
605176734Sjeff		cpu = smp_topo_addleaf(top, child, share, count, flags, cpu);
606176734Sjeff	return (top);
607176734Sjeff}
608176734Sjeff
609176734Sjeffstruct cpu_group *
610176734Sjeffsmp_topo_2level(int l2share, int l2count, int l1share, int l1count,
611176734Sjeff    int l1flags)
612176734Sjeff{
613176734Sjeff	struct cpu_group *top;
614176734Sjeff	struct cpu_group *l1g;
615176734Sjeff	struct cpu_group *l2g;
616176734Sjeff	int cpu;
617176734Sjeff	int i;
618176734Sjeff	int j;
619176734Sjeff
620176734Sjeff	cpu = 0;
621176734Sjeff	top = &group[0];
622176734Sjeff	l2g = &group[1];
623176734Sjeff	top->cg_child = l2g;
624176734Sjeff	top->cg_level = CG_SHARE_NONE;
625176734Sjeff	top->cg_children = mp_ncpus / (l2count * l1count);
626176734Sjeff	l1g = l2g + top->cg_children;
627176734Sjeff	for (i = 0; i < top->cg_children; i++, l2g++) {
628176734Sjeff		l2g->cg_parent = top;
629176734Sjeff		l2g->cg_child = l1g;
630176734Sjeff		l2g->cg_level = l2share;
631176734Sjeff		for (j = 0; j < l2count; j++, l1g++)
632176734Sjeff			cpu = smp_topo_addleaf(l2g, l1g, l1share, l1count,
633176734Sjeff			    l1flags, cpu);
634176734Sjeff	}
635176734Sjeff	return (top);
636176734Sjeff}
637176734Sjeff
638176734Sjeff
639176734Sjeffstruct cpu_group *
640176734Sjeffsmp_topo_find(struct cpu_group *top, int cpu)
641176734Sjeff{
642176734Sjeff	struct cpu_group *cg;
643222813Sattilio	cpuset_t mask;
644176734Sjeff	int children;
645176734Sjeff	int i;
646176734Sjeff
647222813Sattilio	CPU_SETOF(cpu, &mask);
648176734Sjeff	cg = top;
649176734Sjeff	for (;;) {
650222813Sattilio		if (!CPU_OVERLAP(&cg->cg_mask, &mask))
651176734Sjeff			return (NULL);
652176734Sjeff		if (cg->cg_children == 0)
653176734Sjeff			return (cg);
654176734Sjeff		children = cg->cg_children;
655176734Sjeff		for (i = 0, cg = cg->cg_child; i < children; cg++, i++)
656222813Sattilio			if (CPU_OVERLAP(&cg->cg_mask, &mask))
657176734Sjeff				break;
658176734Sjeff	}
659176734Sjeff	return (NULL);
660176734Sjeff}
661176734Sjeff#else /* !SMP */
662176734Sjeff
663123125Sjhbvoid
664222813Sattiliosmp_rendezvous_cpus(cpuset_t map,
665179230Sjb	void (*setup_func)(void *),
666179230Sjb	void (*action_func)(void *),
667179230Sjb	void (*teardown_func)(void *),
668179230Sjb	void *arg)
669179230Sjb{
670227888Sattilio	/*
671227888Sattilio	 * In the !SMP case we just need to ensure the same initial conditions
672227888Sattilio	 * as the SMP case.
673227888Sattilio	 */
674227888Sattilio	spinlock_enter();
675179230Sjb	if (setup_func != NULL)
676179230Sjb		setup_func(arg);
677179230Sjb	if (action_func != NULL)
678179230Sjb		action_func(arg);
679179230Sjb	if (teardown_func != NULL)
680179230Sjb		teardown_func(arg);
681227888Sattilio	spinlock_exit();
682179230Sjb}
683179230Sjb
684179230Sjbvoid
685175057Sjhbsmp_rendezvous(void (*setup_func)(void *),
686175057Sjhb	       void (*action_func)(void *),
687175057Sjhb	       void (*teardown_func)(void *),
688123125Sjhb	       void *arg)
689123125Sjhb{
690123125Sjhb
691227888Sattilio	/* Look comments in the smp_rendezvous_cpus() case. */
692227888Sattilio	spinlock_enter();
693123125Sjhb	if (setup_func != NULL)
694123125Sjhb		setup_func(arg);
695123125Sjhb	if (action_func != NULL)
696123125Sjhb		action_func(arg);
697123125Sjhb	if (teardown_func != NULL)
698123125Sjhb		teardown_func(arg);
699227888Sattilio	spinlock_exit();
700123125Sjhb}
701176734Sjeff
702176734Sjeff/*
703176734Sjeff * Provide dummy SMP support for UP kernels.  Modules that need to use SMP
704176734Sjeff * APIs will still work using this dummy support.
705176734Sjeff */
706176734Sjeffstatic void
707176734Sjeffmp_setvariables_for_up(void *dummy)
708176734Sjeff{
709176734Sjeff	mp_ncpus = 1;
710176734Sjeff	mp_maxid = PCPU_GET(cpuid);
711223758Sattilio	CPU_SETOF(mp_maxid, &all_cpus);
712176734Sjeff	KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero"));
713176734Sjeff}
714176734SjeffSYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST,
715177253Srwatson    mp_setvariables_for_up, NULL);
716123125Sjhb#endif /* SMP */
717179230Sjb
718179230Sjbvoid
719179230Sjbsmp_no_rendevous_barrier(void *dummy)
720179230Sjb{
721179230Sjb#ifdef SMP
722179230Sjb	KASSERT((!smp_started),("smp_no_rendevous called and smp is started"));
723179230Sjb#endif
724179230Sjb}
725