1139804Simp/*- 2184601Sjhb * Copyright (c) 2001, John Baldwin <jhb@FreeBSD.org>. 3184601Sjhb * All rights reserved. 425164Speter * 525164Speter * Redistribution and use in source and binary forms, with or without 625164Speter * modification, are permitted provided that the following conditions 725164Speter * are met: 825164Speter * 1. Redistributions of source code must retain the above copyright 925164Speter * notice, this list of conditions and the following disclaimer. 1076078Sjhb * 2. Redistributions in binary form must reproduce the above copyright 1176078Sjhb * notice, this list of conditions and the following disclaimer in the 1276078Sjhb * documentation and/or other materials provided with the distribution. 1325164Speter * 14184601Sjhb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1525164Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1625164Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17184601Sjhb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18184601Sjhb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19184601Sjhb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20184601Sjhb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21184601Sjhb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22184601Sjhb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23184601Sjhb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24184601Sjhb * SUCH DAMAGE. 2525164Speter */ 2625164Speter 2776078Sjhb/* 2876078Sjhb * This module holds the global variables and machine independent functions 2976440Sjhb * used for the kernel SMP support. 3076078Sjhb */ 3125164Speter 32116182Sobrien#include <sys/cdefs.h> 33116182Sobrien__FBSDID("$FreeBSD: stable/10/sys/kern/subr_smp.c 331910 2018-04-03 07:52:06Z avg $"); 34116182Sobrien 3528743Sbde#include <sys/param.h> 3625164Speter#include <sys/systm.h> 3776440Sjhb#include <sys/kernel.h> 3876078Sjhb#include <sys/ktr.h> 3928808Speter#include <sys/proc.h> 40126763Snjl#include <sys/bus.h> 4176078Sjhb#include <sys/lock.h> 4267365Sjhb#include <sys/mutex.h> 4376440Sjhb#include <sys/pcpu.h> 44243046Sjeff#include <sys/sched.h> 4576078Sjhb#include <sys/smp.h> 4676078Sjhb#include <sys/sysctl.h> 4725164Speter 48171191Sjhb#include <machine/cpu.h> 4991778Sjake#include <machine/smp.h> 5091778Sjake 51134591Sjulian#include "opt_sched.h" 52134591Sjulian 53123125Sjhb#ifdef SMP 54222813Sattiliovolatile cpuset_t stopped_cpus; 55222813Sattiliovolatile cpuset_t started_cpus; 56236772Siwasakivolatile cpuset_t suspended_cpus; 57222813Sattiliocpuset_t hlt_cpus_mask; 58222813Sattiliocpuset_t logical_cpus_mask; 5925164Speter 6092723Salfredvoid (*cpustop_restartfunc)(void); 61123125Sjhb#endif 62265606Sscottl 63265606Sscottlstatic int sysctl_kern_smp_active(SYSCTL_HANDLER_ARGS); 64265606Sscottl 65134688Sjulian/* This is used in modules that need to work in both SMP and UP. */ 66222813Sattiliocpuset_t all_cpus; 67123125Sjhb 6876078Sjhbint mp_ncpus; 69123766Salfred/* export this for libkvm consumers. */ 70123766Salfredint mp_maxcpus = MAXCPU; 7128027Sfsmp 7285787Smarcelvolatile int smp_started; 7391673Sjeffu_int mp_maxid; 7425164Speter 75227309Sedstatic SYSCTL_NODE(_kern, OID_AUTO, smp, CTLFLAG_RD|CTLFLAG_CAPRD, NULL, 76227309Sed "Kernel SMP"); 7725164Speter 78224159SrwatsonSYSCTL_INT(_kern_smp, OID_AUTO, maxid, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxid, 0, 79179230Sjb "Max CPU ID."); 80179230Sjb 81224159SrwatsonSYSCTL_INT(_kern_smp, OID_AUTO, maxcpus, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_maxcpus, 82224159Srwatson 0, "Max number of CPUs that the system was compiled for."); 83123766Salfred 84265606SscottlSYSCTL_PROC(_kern_smp, OID_AUTO, active, CTLFLAG_RD | CTLTYPE_INT, NULL, 0, 85265606Sscottl sysctl_kern_smp_active, "I", "Indicates system is running in SMP mode"); 8626155Sfsmp 87108371Sjakeint smp_disabled = 0; /* has smp been disabled? */ 88224159SrwatsonSYSCTL_INT(_kern_smp, OID_AUTO, disabled, CTLFLAG_RDTUN|CTLFLAG_CAPRD, 89224159Srwatson &smp_disabled, 0, "SMP has been disabled from the loader"); 90108371SjakeTUNABLE_INT("kern.smp.disabled", &smp_disabled); 91108371Sjake 9276078Sjhbint smp_cpus = 1; /* how many cpu's running */ 93224159SrwatsonSYSCTL_INT(_kern_smp, OID_AUTO, cpus, CTLFLAG_RD|CTLFLAG_CAPRD, &smp_cpus, 0, 94116270Sdes "Number of CPUs online"); 9527005Sfsmp 96176734Sjeffint smp_topology = 0; /* Which topology we're using. */ 97176734SjeffSYSCTL_INT(_kern_smp, OID_AUTO, topology, CTLFLAG_RD, &smp_topology, 0, 98176734Sjeff "Topology override setting; 0 is default provided by hardware."); 99176734SjeffTUNABLE_INT("kern.smp.topology", &smp_topology); 100176734Sjeff 101123125Sjhb#ifdef SMP 10271525Sjhb/* Enable forwarding of a signal to a process running on a different CPU */ 10371525Sjhbstatic int forward_signal_enabled = 1; 10476078SjhbSYSCTL_INT(_kern_smp, OID_AUTO, forward_signal_enabled, CTLFLAG_RW, 105116270Sdes &forward_signal_enabled, 0, 106116270Sdes "Forwarding of a signal to a process on a different CPU"); 10771525Sjhb 10876078Sjhb/* Variables needed for SMP rendezvous. */ 109182292Sjhbstatic volatile int smp_rv_ncpus; 110173444Supsstatic void (*volatile smp_rv_setup_func)(void *arg); 111173444Supsstatic void (*volatile smp_rv_action_func)(void *arg); 112175057Sjhbstatic void (*volatile smp_rv_teardown_func)(void *arg); 113187719Sjhbstatic void *volatile smp_rv_func_arg; 114224527Savgstatic volatile int smp_rv_waiters[4]; 11571525Sjhb 116134227Speter/* 117134227Speter * Shared mutex to restrict busywaits between smp_rendezvous() and 118134227Speter * smp(_targeted)_tlb_shootdown(). A deadlock occurs if both of these 119134227Speter * functions trigger at once and cause multiple CPUs to busywait with 120134227Speter * interrupts disabled. 121134227Speter */ 122134416Sobrienstruct mtx smp_ipi_mtx; 123134227Speter 12425164Speter/* 125122947Sjhb * Let the MD SMP code initialize mp_maxid very early if it can. 12625164Speter */ 12771525Sjhbstatic void 128122947Sjhbmp_setmaxid(void *dummy) 12991673Sjeff{ 130122947Sjhb cpu_mp_setmaxid(); 13191673Sjeff} 132177253SrwatsonSYSINIT(cpu_mp_setmaxid, SI_SUB_TUNABLES, SI_ORDER_FIRST, mp_setmaxid, NULL); 13391673Sjeff 13491673Sjeff/* 13591673Sjeff * Call the MD SMP initialization code. 13691673Sjeff */ 13791673Sjeffstatic void 13876078Sjhbmp_start(void *dummy) 13971525Sjhb{ 14071525Sjhb 141207921Sattilio mtx_init(&smp_ipi_mtx, "smp rendezvous", NULL, MTX_SPIN); 142207921Sattilio 14376078Sjhb /* Probe for MP hardware. */ 144122947Sjhb if (smp_disabled != 0 || cpu_mp_probe() == 0) { 145121756Sjhb mp_ncpus = 1; 146223758Sattilio CPU_SETOF(PCPU_GET(cpuid), &all_cpus); 14776078Sjhb return; 148121756Sjhb } 14976078Sjhb 15076078Sjhb cpu_mp_start(); 15176078Sjhb printf("FreeBSD/SMP: Multiprocessor System Detected: %d CPUs\n", 15276078Sjhb mp_ncpus); 15376078Sjhb cpu_mp_announce(); 15471525Sjhb} 155177253SrwatsonSYSINIT(cpu_mp, SI_SUB_CPU, SI_ORDER_THIRD, mp_start, NULL); 15671525Sjhb 15725164Spetervoid 15883366Sjulianforward_signal(struct thread *td) 15925164Speter{ 16076078Sjhb int id; 16125164Speter 16226108Sfsmp /* 163112888Sjeff * signotify() has already set TDF_ASTPENDING and TDF_NEEDSIGCHECK on 164112888Sjeff * this thread, so all we need to do is poke it if it is currently 16593873Sbde * executing so that it executes ast(). 16626108Sfsmp */ 167170307Sjeff THREAD_LOCK_ASSERT(td, MA_OWNED); 168103216Sjulian KASSERT(TD_IS_RUNNING(td), 16999072Sjulian ("forward_signal: thread is not TDS_RUNNING")); 17031639Sfsmp 17183366Sjulian CTR1(KTR_SMP, "forward_signal(%p)", td->td_proc); 17265557Sjasone 17376078Sjhb if (!smp_started || cold || panicstr) 17431639Sfsmp return; 17576078Sjhb if (!forward_signal_enabled) 17676078Sjhb return; 17731639Sfsmp 17876078Sjhb /* No need to IPI ourself. */ 17983366Sjulian if (td == curthread) 18031639Sfsmp return; 18131639Sfsmp 182113339Sjulian id = td->td_oncpu; 18376078Sjhb if (id == NOCPU) 18434020Stegge return; 185210939Sjhb ipi_cpu(id, IPI_AST); 18634020Stegge} 18734021Stegge 18871525Sjhb/* 18971525Sjhb * When called the executing CPU will send an IPI to all other CPUs 19071525Sjhb * requesting that they halt execution. 19171525Sjhb * 19271525Sjhb * Usually (but not necessarily) called with 'other_cpus' as its arg. 19371525Sjhb * 19471525Sjhb * - Signals all CPUs in map to stop. 19571525Sjhb * - Waits for each to stop. 19671525Sjhb * 19771525Sjhb * Returns: 19871525Sjhb * -1: error 19971525Sjhb * 0: NA 20071525Sjhb * 1: ok 20171525Sjhb * 20271525Sjhb */ 203196196Sattiliostatic int 204222813Sattiliogeneric_stop_cpus(cpuset_t map, u_int type) 20571525Sjhb{ 206222813Sattilio#ifdef KTR 207222813Sattilio char cpusetbuf[CPUSETBUFSIZ]; 208222813Sattilio#endif 209213736Savg static volatile u_int stopping_cpu = NOCPU; 21076078Sjhb int i; 211236906Siwasaki volatile cpuset_t *cpus; 21236135Stegge 213213736Savg KASSERT( 214235622Siwasaki#if defined(__amd64__) || defined(__i386__) 215213736Savg type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND, 216213736Savg#else 217213736Savg type == IPI_STOP || type == IPI_STOP_HARD, 218213736Savg#endif 219196196Sattilio ("%s: invalid stop type", __func__)); 220196196Sattilio 22171525Sjhb if (!smp_started) 222213736Savg return (0); 22371525Sjhb 224222813Sattilio CTR2(KTR_SMP, "stop_cpus(%s) with %u type", 225222813Sattilio cpusetobj_strprint(cpusetbuf, &map), type); 22676078Sjhb 227255783Sgibbs#if defined(__amd64__) || defined(__i386__) 228255726Sgibbs /* 229255744Sgibbs * When suspending, ensure there are are no IPIs in progress. 230255744Sgibbs * IPIs that have been issued, but not yet delivered (e.g. 231255744Sgibbs * not pending on a vCPU when running under virtualization) 232255744Sgibbs * will be lost, violating FreeBSD's assumption of reliable 233255744Sgibbs * IPI delivery. 234255726Sgibbs */ 235255726Sgibbs if (type == IPI_SUSPEND) 236255726Sgibbs mtx_lock_spin(&smp_ipi_mtx); 237255783Sgibbs#endif 238255726Sgibbs 239213736Savg if (stopping_cpu != PCPU_GET(cpuid)) 240213736Savg while (atomic_cmpset_int(&stopping_cpu, NOCPU, 241213736Savg PCPU_GET(cpuid)) == 0) 242213736Savg while (stopping_cpu != NOCPU) 243213736Savg cpu_spinwait(); /* spin */ 244213736Savg 24576078Sjhb /* send the stop IPI to all CPUs in map */ 246196196Sattilio ipi_selected(map, type); 247127498Smarcel 248236906Siwasaki#if defined(__amd64__) || defined(__i386__) 249236906Siwasaki if (type == IPI_SUSPEND) 250236906Siwasaki cpus = &suspended_cpus; 251236906Siwasaki else 252236906Siwasaki#endif 253236906Siwasaki cpus = &stopped_cpus; 254236906Siwasaki 25576078Sjhb i = 0; 256236906Siwasaki while (!CPU_SUBSET(cpus, &map)) { 25776078Sjhb /* spin */ 258171191Sjhb cpu_spinwait(); 25976078Sjhb i++; 260223530Savg if (i == 100000000) { 26176078Sjhb printf("timeout stopping cpus\n"); 26276078Sjhb break; 26376078Sjhb } 26476078Sjhb } 26571525Sjhb 266255783Sgibbs#if defined(__amd64__) || defined(__i386__) 267255726Sgibbs if (type == IPI_SUSPEND) 268255726Sgibbs mtx_unlock_spin(&smp_ipi_mtx); 269255783Sgibbs#endif 270255726Sgibbs 271213736Savg stopping_cpu = NOCPU; 272213736Savg return (1); 27371525Sjhb} 27471525Sjhb 275196196Sattilioint 276222813Sattiliostop_cpus(cpuset_t map) 277196196Sattilio{ 278196196Sattilio 279196196Sattilio return (generic_stop_cpus(map, IPI_STOP)); 280196196Sattilio} 281196196Sattilio 282196196Sattilioint 283222813Sattiliostop_cpus_hard(cpuset_t map) 284196196Sattilio{ 285196196Sattilio 286196196Sattilio return (generic_stop_cpus(map, IPI_STOP_HARD)); 287196196Sattilio} 288196196Sattilio 289235622Siwasaki#if defined(__amd64__) || defined(__i386__) 290189903Sjkimint 291222813Sattiliosuspend_cpus(cpuset_t map) 292189903Sjkim{ 293189903Sjkim 294213736Savg return (generic_stop_cpus(map, IPI_SUSPEND)); 295189903Sjkim} 296189903Sjkim#endif 297189903Sjkim 298189903Sjkim/* 29971525Sjhb * Called by a CPU to restart stopped CPUs. 30071525Sjhb * 30171525Sjhb * Usually (but not necessarily) called with 'stopped_cpus' as its arg. 30271525Sjhb * 30371525Sjhb * - Signals all CPUs in map to restart. 30471525Sjhb * - Waits for each to restart. 30571525Sjhb * 30671525Sjhb * Returns: 30771525Sjhb * -1: error 30871525Sjhb * 0: NA 30971525Sjhb * 1: ok 31071525Sjhb */ 311255726Sgibbsstatic int 312255726Sgibbsgeneric_restart_cpus(cpuset_t map, u_int type) 31371525Sjhb{ 314222813Sattilio#ifdef KTR 315222813Sattilio char cpusetbuf[CPUSETBUFSIZ]; 316222813Sattilio#endif 317255726Sgibbs volatile cpuset_t *cpus; 31871525Sjhb 319255726Sgibbs KASSERT( 320255726Sgibbs#if defined(__amd64__) || defined(__i386__) 321255726Sgibbs type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND, 322255726Sgibbs#else 323255726Sgibbs type == IPI_STOP || type == IPI_STOP_HARD, 324255726Sgibbs#endif 325255726Sgibbs ("%s: invalid stop type", __func__)); 326255726Sgibbs 32771525Sjhb if (!smp_started) 32871525Sjhb return 0; 32971525Sjhb 330222813Sattilio CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map)); 33171525Sjhb 332255726Sgibbs#if defined(__amd64__) || defined(__i386__) 333255726Sgibbs if (type == IPI_SUSPEND) 334331910Savg cpus = &resuming_cpus; 335255726Sgibbs else 336255726Sgibbs#endif 337255726Sgibbs cpus = &stopped_cpus; 338255726Sgibbs 33976078Sjhb /* signal other cpus to restart */ 340331910Savg#if defined(__amd64__) || defined(__i386__) 341331910Savg if (type == IPI_SUSPEND) 342331910Savg CPU_COPY_STORE_REL(&map, &toresume_cpus); 343331910Savg else 344331910Savg#endif 345331910Savg CPU_COPY_STORE_REL(&map, &started_cpus); 34676078Sjhb 34771525Sjhb /* wait for each to clear its bit */ 348255726Sgibbs while (CPU_OVERLAP(cpus, &map)) 349171191Sjhb cpu_spinwait(); 35071525Sjhb 35171525Sjhb return 1; 35271525Sjhb} 35371525Sjhb 354255726Sgibbsint 355255726Sgibbsrestart_cpus(cpuset_t map) 356255726Sgibbs{ 357255726Sgibbs 358255726Sgibbs return (generic_restart_cpus(map, IPI_STOP)); 359255726Sgibbs} 360255726Sgibbs 361255726Sgibbs#if defined(__amd64__) || defined(__i386__) 362255726Sgibbsint 363255726Sgibbsresume_cpus(cpuset_t map) 364255726Sgibbs{ 365255726Sgibbs 366255726Sgibbs return (generic_restart_cpus(map, IPI_SUSPEND)); 367255726Sgibbs} 368255726Sgibbs#endif 369255726Sgibbs 37034021Stegge/* 37148924Smsmith * All-CPU rendezvous. CPUs are signalled, all execute the setup function 37248924Smsmith * (if specified), rendezvous, execute the action function (if specified), 37348924Smsmith * rendezvous again, execute the teardown function (if specified), and then 37448924Smsmith * resume. 37548924Smsmith * 37648924Smsmith * Note that the supplied external functions _must_ be reentrant and aware 37748924Smsmith * that they are running in parallel and in an unknown lock context. 37848924Smsmith */ 37948924Smsmithvoid 38048924Smsmithsmp_rendezvous_action(void) 38148924Smsmith{ 382222254Sjhb struct thread *td; 383222032Sjhb void *local_func_arg; 384222032Sjhb void (*local_setup_func)(void*); 385222032Sjhb void (*local_action_func)(void*); 386222032Sjhb void (*local_teardown_func)(void*); 387222254Sjhb#ifdef INVARIANTS 388222254Sjhb int owepreempt; 389222254Sjhb#endif 390175057Sjhb 391171191Sjhb /* Ensure we have up-to-date values. */ 392171191Sjhb atomic_add_acq_int(&smp_rv_waiters[0], 1); 393182292Sjhb while (smp_rv_waiters[0] < smp_rv_ncpus) 394171191Sjhb cpu_spinwait(); 395171191Sjhb 396222032Sjhb /* Fetch rendezvous parameters after acquire barrier. */ 397222032Sjhb local_func_arg = smp_rv_func_arg; 398222032Sjhb local_setup_func = smp_rv_setup_func; 399222032Sjhb local_action_func = smp_rv_action_func; 400222032Sjhb local_teardown_func = smp_rv_teardown_func; 401222032Sjhb 402222032Sjhb /* 403222254Sjhb * Use a nested critical section to prevent any preemptions 404222254Sjhb * from occurring during a rendezvous action routine. 405222254Sjhb * Specifically, if a rendezvous handler is invoked via an IPI 406222254Sjhb * and the interrupted thread was in the critical_exit() 407222254Sjhb * function after setting td_critnest to 0 but before 408222254Sjhb * performing a deferred preemption, this routine can be 409222254Sjhb * invoked with td_critnest set to 0 and td_owepreempt true. 410222254Sjhb * In that case, a critical_exit() during the rendezvous 411222254Sjhb * action would trigger a preemption which is not permitted in 412222254Sjhb * a rendezvous action. To fix this, wrap all of the 413222254Sjhb * rendezvous action handlers in a critical section. We 414222254Sjhb * cannot use a regular critical section however as having 415222254Sjhb * critical_exit() preempt from this routine would also be 416222254Sjhb * problematic (the preemption must not occur before the IPI 417222266Sjhb * has been acknowledged via an EOI). Instead, we 418222254Sjhb * intentionally ignore td_owepreempt when leaving the 419222266Sjhb * critical section. This should be harmless because we do 420222266Sjhb * not permit rendezvous action routines to schedule threads, 421222266Sjhb * and thus td_owepreempt should never transition from 0 to 1 422222254Sjhb * during this routine. 423222254Sjhb */ 424222254Sjhb td = curthread; 425222254Sjhb td->td_critnest++; 426222254Sjhb#ifdef INVARIANTS 427222254Sjhb owepreempt = td->td_owepreempt; 428222254Sjhb#endif 429222254Sjhb 430222254Sjhb /* 431222032Sjhb * If requested, run a setup function before the main action 432222032Sjhb * function. Ensure all CPUs have completed the setup 433222032Sjhb * function before moving on to the action function. 434222032Sjhb */ 435173444Sups if (local_setup_func != smp_no_rendevous_barrier) { 436173444Sups if (smp_rv_setup_func != NULL) 437173444Sups smp_rv_setup_func(smp_rv_func_arg); 438173444Sups atomic_add_int(&smp_rv_waiters[1], 1); 439182292Sjhb while (smp_rv_waiters[1] < smp_rv_ncpus) 440173444Sups cpu_spinwait(); 441173444Sups } 442171191Sjhb 443173444Sups if (local_action_func != NULL) 444173444Sups local_action_func(local_func_arg); 445171191Sjhb 446222254Sjhb if (local_teardown_func != smp_no_rendevous_barrier) { 447224527Savg /* 448224527Savg * Signal that the main action has been completed. If a 449224527Savg * full exit rendezvous is requested, then all CPUs will 450224527Savg * wait here until all CPUs have finished the main action. 451224527Savg */ 452224527Savg atomic_add_int(&smp_rv_waiters[2], 1); 453224527Savg while (smp_rv_waiters[2] < smp_rv_ncpus) 454222254Sjhb cpu_spinwait(); 455175057Sjhb 456222254Sjhb if (local_teardown_func != NULL) 457222254Sjhb local_teardown_func(local_func_arg); 458222254Sjhb } 459222254Sjhb 460224527Savg /* 461224527Savg * Signal that the rendezvous is fully completed by this CPU. 462224527Savg * This means that no member of smp_rv_* pseudo-structure will be 463224527Savg * accessed by this target CPU after this point; in particular, 464224527Savg * memory pointed by smp_rv_func_arg. 465224527Savg */ 466224527Savg atomic_add_int(&smp_rv_waiters[3], 1); 467224527Savg 468222254Sjhb td->td_critnest--; 469222254Sjhb KASSERT(owepreempt == td->td_owepreempt, 470222254Sjhb ("rendezvous action changed td_owepreempt")); 47148924Smsmith} 47248924Smsmith 47348924Smsmithvoid 474222813Sattiliosmp_rendezvous_cpus(cpuset_t map, 475179230Sjb void (* setup_func)(void *), 476179230Sjb void (* action_func)(void *), 477179230Sjb void (* teardown_func)(void *), 478179230Sjb void *arg) 47948924Smsmith{ 480222813Sattilio int curcpumap, i, ncpus = 0; 48171576Sjasone 482227058Sattilio /* Look comments in the !SMP case. */ 48376078Sjhb if (!smp_started) { 484227058Sattilio spinlock_enter(); 48576078Sjhb if (setup_func != NULL) 48676078Sjhb setup_func(arg); 48776078Sjhb if (action_func != NULL) 48876078Sjhb action_func(arg); 48976078Sjhb if (teardown_func != NULL) 49076078Sjhb teardown_func(arg); 491227058Sattilio spinlock_exit(); 49276078Sjhb return; 49376078Sjhb } 494179230Sjb 495209059Sjhb CPU_FOREACH(i) { 496222813Sattilio if (CPU_ISSET(i, &map)) 497179230Sjb ncpus++; 498209059Sjhb } 499189232Sdchagin if (ncpus == 0) 500222813Sattilio panic("ncpus is 0 with non-zero map"); 501182292Sjhb 502134416Sobrien mtx_lock_spin(&smp_ipi_mtx); 50348924Smsmith 504222032Sjhb /* Pass rendezvous parameters via global variables. */ 505182292Sjhb smp_rv_ncpus = ncpus; 50648924Smsmith smp_rv_setup_func = setup_func; 50748924Smsmith smp_rv_action_func = action_func; 50848924Smsmith smp_rv_teardown_func = teardown_func; 50948924Smsmith smp_rv_func_arg = arg; 51048924Smsmith smp_rv_waiters[1] = 0; 511171191Sjhb smp_rv_waiters[2] = 0; 512224527Savg smp_rv_waiters[3] = 0; 513171191Sjhb atomic_store_rel_int(&smp_rv_waiters[0], 0); 51448924Smsmith 515222032Sjhb /* 516222032Sjhb * Signal other processors, which will enter the IPI with 517222032Sjhb * interrupts off. 518222032Sjhb */ 519222813Sattilio curcpumap = CPU_ISSET(curcpu, &map); 520222813Sattilio CPU_CLR(curcpu, &map); 521222813Sattilio ipi_selected(map, IPI_RENDEZVOUS); 52248924Smsmith 523179230Sjb /* Check if the current CPU is in the map */ 524222813Sattilio if (curcpumap != 0) 525179230Sjb smp_rendezvous_action(); 52648924Smsmith 527222032Sjhb /* 528224527Savg * Ensure that the master CPU waits for all the other 529224527Savg * CPUs to finish the rendezvous, so that smp_rv_* 530224527Savg * pseudo-structure and the arg are guaranteed to not 531224527Savg * be in use. 532222032Sjhb */ 533224527Savg while (atomic_load_acq_int(&smp_rv_waiters[3]) < ncpus) 534224527Savg cpu_spinwait(); 535175057Sjhb 536134416Sobrien mtx_unlock_spin(&smp_ipi_mtx); 53748924Smsmith} 538123125Sjhb 539179230Sjbvoid 540179230Sjbsmp_rendezvous(void (* setup_func)(void *), 541179230Sjb void (* action_func)(void *), 542179230Sjb void (* teardown_func)(void *), 543179230Sjb void *arg) 544179230Sjb{ 545179230Sjb smp_rendezvous_cpus(all_cpus, setup_func, action_func, teardown_func, arg); 546179230Sjb} 547179230Sjb 548176734Sjeffstatic struct cpu_group group[MAXCPU]; 549176734Sjeff 550176734Sjeffstruct cpu_group * 551176734Sjeffsmp_topo(void) 552123125Sjhb{ 553222813Sattilio char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ]; 554176734Sjeff struct cpu_group *top; 555176734Sjeff 556176734Sjeff /* 557176734Sjeff * Check for a fake topology request for debugging purposes. 558176734Sjeff */ 559176734Sjeff switch (smp_topology) { 560176734Sjeff case 1: 561176734Sjeff /* Dual core with no sharing. */ 562176734Sjeff top = smp_topo_1level(CG_SHARE_NONE, 2, 0); 563176734Sjeff break; 564177007Sjeff case 2: 565177007Sjeff /* No topology, all cpus are equal. */ 566177007Sjeff top = smp_topo_none(); 567177007Sjeff break; 568176734Sjeff case 3: 569176734Sjeff /* Dual core with shared L2. */ 570176734Sjeff top = smp_topo_1level(CG_SHARE_L2, 2, 0); 571176734Sjeff break; 572176734Sjeff case 4: 573176734Sjeff /* quad core, shared l3 among each package, private l2. */ 574176734Sjeff top = smp_topo_1level(CG_SHARE_L3, 4, 0); 575176734Sjeff break; 576176734Sjeff case 5: 577176734Sjeff /* quad core, 2 dualcore parts on each package share l2. */ 578176734Sjeff top = smp_topo_2level(CG_SHARE_NONE, 2, CG_SHARE_L2, 2, 0); 579176734Sjeff break; 580176734Sjeff case 6: 581176734Sjeff /* Single-core 2xHTT */ 582176734Sjeff top = smp_topo_1level(CG_SHARE_L1, 2, CG_FLAG_HTT); 583176734Sjeff break; 584176734Sjeff case 7: 585176734Sjeff /* quad core with a shared l3, 8 threads sharing L2. */ 586176734Sjeff top = smp_topo_2level(CG_SHARE_L3, 4, CG_SHARE_L2, 8, 587191643Sjeff CG_FLAG_SMT); 588176734Sjeff break; 589176734Sjeff default: 590176734Sjeff /* Default, ask the system what it wants. */ 591176734Sjeff top = cpu_topo(); 592176734Sjeff break; 593176734Sjeff } 594176734Sjeff /* 595176734Sjeff * Verify the returned topology. 596176734Sjeff */ 597176734Sjeff if (top->cg_count != mp_ncpus) 598176734Sjeff panic("Built bad topology at %p. CPU count %d != %d", 599176734Sjeff top, top->cg_count, mp_ncpus); 600222813Sattilio if (CPU_CMP(&top->cg_mask, &all_cpus)) 601222813Sattilio panic("Built bad topology at %p. CPU mask (%s) != (%s)", 602222813Sattilio top, cpusetobj_strprint(cpusetbuf, &top->cg_mask), 603222813Sattilio cpusetobj_strprint(cpusetbuf2, &all_cpus)); 604176734Sjeff return (top); 605123125Sjhb} 606123125Sjhb 607176734Sjeffstruct cpu_group * 608176734Sjeffsmp_topo_none(void) 609176734Sjeff{ 610176734Sjeff struct cpu_group *top; 611176734Sjeff 612176734Sjeff top = &group[0]; 613176734Sjeff top->cg_parent = NULL; 614176734Sjeff top->cg_child = NULL; 615218584Sjmallett top->cg_mask = all_cpus; 616176734Sjeff top->cg_count = mp_ncpus; 617176734Sjeff top->cg_children = 0; 618176734Sjeff top->cg_level = CG_SHARE_NONE; 619176734Sjeff top->cg_flags = 0; 620176734Sjeff 621176734Sjeff return (top); 622176734Sjeff} 623176734Sjeff 624176734Sjeffstatic int 625176734Sjeffsmp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share, 626176734Sjeff int count, int flags, int start) 627176734Sjeff{ 628222813Sattilio char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ]; 629222813Sattilio cpuset_t mask; 630176734Sjeff int i; 631176734Sjeff 632222813Sattilio CPU_ZERO(&mask); 633222813Sattilio for (i = 0; i < count; i++, start++) 634222813Sattilio CPU_SET(start, &mask); 635176734Sjeff child->cg_parent = parent; 636176734Sjeff child->cg_child = NULL; 637176734Sjeff child->cg_children = 0; 638176734Sjeff child->cg_level = share; 639176734Sjeff child->cg_count = count; 640176734Sjeff child->cg_flags = flags; 641176734Sjeff child->cg_mask = mask; 642176734Sjeff parent->cg_children++; 643176734Sjeff for (; parent != NULL; parent = parent->cg_parent) { 644222813Sattilio if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask)) 645222813Sattilio panic("Duplicate children in %p. mask (%s) child (%s)", 646222813Sattilio parent, 647222813Sattilio cpusetobj_strprint(cpusetbuf, &parent->cg_mask), 648222813Sattilio cpusetobj_strprint(cpusetbuf2, &child->cg_mask)); 649222813Sattilio CPU_OR(&parent->cg_mask, &child->cg_mask); 650176734Sjeff parent->cg_count += child->cg_count; 651176734Sjeff } 652176734Sjeff 653176734Sjeff return (start); 654176734Sjeff} 655176734Sjeff 656176734Sjeffstruct cpu_group * 657176734Sjeffsmp_topo_1level(int share, int count, int flags) 658176734Sjeff{ 659176734Sjeff struct cpu_group *child; 660176734Sjeff struct cpu_group *top; 661176734Sjeff int packages; 662176734Sjeff int cpu; 663176734Sjeff int i; 664176734Sjeff 665176734Sjeff cpu = 0; 666176734Sjeff top = &group[0]; 667176734Sjeff packages = mp_ncpus / count; 668176734Sjeff top->cg_child = child = &group[1]; 669176734Sjeff top->cg_level = CG_SHARE_NONE; 670176734Sjeff for (i = 0; i < packages; i++, child++) 671176734Sjeff cpu = smp_topo_addleaf(top, child, share, count, flags, cpu); 672176734Sjeff return (top); 673176734Sjeff} 674176734Sjeff 675176734Sjeffstruct cpu_group * 676176734Sjeffsmp_topo_2level(int l2share, int l2count, int l1share, int l1count, 677176734Sjeff int l1flags) 678176734Sjeff{ 679176734Sjeff struct cpu_group *top; 680176734Sjeff struct cpu_group *l1g; 681176734Sjeff struct cpu_group *l2g; 682176734Sjeff int cpu; 683176734Sjeff int i; 684176734Sjeff int j; 685176734Sjeff 686176734Sjeff cpu = 0; 687176734Sjeff top = &group[0]; 688176734Sjeff l2g = &group[1]; 689176734Sjeff top->cg_child = l2g; 690176734Sjeff top->cg_level = CG_SHARE_NONE; 691176734Sjeff top->cg_children = mp_ncpus / (l2count * l1count); 692176734Sjeff l1g = l2g + top->cg_children; 693176734Sjeff for (i = 0; i < top->cg_children; i++, l2g++) { 694176734Sjeff l2g->cg_parent = top; 695176734Sjeff l2g->cg_child = l1g; 696176734Sjeff l2g->cg_level = l2share; 697176734Sjeff for (j = 0; j < l2count; j++, l1g++) 698176734Sjeff cpu = smp_topo_addleaf(l2g, l1g, l1share, l1count, 699176734Sjeff l1flags, cpu); 700176734Sjeff } 701176734Sjeff return (top); 702176734Sjeff} 703176734Sjeff 704176734Sjeff 705176734Sjeffstruct cpu_group * 706176734Sjeffsmp_topo_find(struct cpu_group *top, int cpu) 707176734Sjeff{ 708176734Sjeff struct cpu_group *cg; 709222813Sattilio cpuset_t mask; 710176734Sjeff int children; 711176734Sjeff int i; 712176734Sjeff 713222813Sattilio CPU_SETOF(cpu, &mask); 714176734Sjeff cg = top; 715176734Sjeff for (;;) { 716222813Sattilio if (!CPU_OVERLAP(&cg->cg_mask, &mask)) 717176734Sjeff return (NULL); 718176734Sjeff if (cg->cg_children == 0) 719176734Sjeff return (cg); 720176734Sjeff children = cg->cg_children; 721176734Sjeff for (i = 0, cg = cg->cg_child; i < children; cg++, i++) 722222813Sattilio if (CPU_OVERLAP(&cg->cg_mask, &mask)) 723176734Sjeff break; 724176734Sjeff } 725176734Sjeff return (NULL); 726176734Sjeff} 727176734Sjeff#else /* !SMP */ 728176734Sjeff 729123125Sjhbvoid 730222813Sattiliosmp_rendezvous_cpus(cpuset_t map, 731179230Sjb void (*setup_func)(void *), 732179230Sjb void (*action_func)(void *), 733179230Sjb void (*teardown_func)(void *), 734179230Sjb void *arg) 735179230Sjb{ 736227058Sattilio /* 737227058Sattilio * In the !SMP case we just need to ensure the same initial conditions 738227058Sattilio * as the SMP case. 739227058Sattilio */ 740227058Sattilio spinlock_enter(); 741179230Sjb if (setup_func != NULL) 742179230Sjb setup_func(arg); 743179230Sjb if (action_func != NULL) 744179230Sjb action_func(arg); 745179230Sjb if (teardown_func != NULL) 746179230Sjb teardown_func(arg); 747227058Sattilio spinlock_exit(); 748179230Sjb} 749179230Sjb 750179230Sjbvoid 751175057Sjhbsmp_rendezvous(void (*setup_func)(void *), 752175057Sjhb void (*action_func)(void *), 753175057Sjhb void (*teardown_func)(void *), 754123125Sjhb void *arg) 755123125Sjhb{ 756123125Sjhb 757227058Sattilio /* Look comments in the smp_rendezvous_cpus() case. */ 758227058Sattilio spinlock_enter(); 759123125Sjhb if (setup_func != NULL) 760123125Sjhb setup_func(arg); 761123125Sjhb if (action_func != NULL) 762123125Sjhb action_func(arg); 763123125Sjhb if (teardown_func != NULL) 764123125Sjhb teardown_func(arg); 765227058Sattilio spinlock_exit(); 766123125Sjhb} 767176734Sjeff 768176734Sjeff/* 769176734Sjeff * Provide dummy SMP support for UP kernels. Modules that need to use SMP 770176734Sjeff * APIs will still work using this dummy support. 771176734Sjeff */ 772176734Sjeffstatic void 773176734Sjeffmp_setvariables_for_up(void *dummy) 774176734Sjeff{ 775176734Sjeff mp_ncpus = 1; 776176734Sjeff mp_maxid = PCPU_GET(cpuid); 777223758Sattilio CPU_SETOF(mp_maxid, &all_cpus); 778176734Sjeff KASSERT(PCPU_GET(cpuid) == 0, ("UP must have a CPU ID of zero")); 779176734Sjeff} 780176734SjeffSYSINIT(cpu_mp_setvariables, SI_SUB_TUNABLES, SI_ORDER_FIRST, 781177253Srwatson mp_setvariables_for_up, NULL); 782123125Sjhb#endif /* SMP */ 783179230Sjb 784179230Sjbvoid 785179230Sjbsmp_no_rendevous_barrier(void *dummy) 786179230Sjb{ 787179230Sjb#ifdef SMP 788179230Sjb KASSERT((!smp_started),("smp_no_rendevous called and smp is started")); 789179230Sjb#endif 790179230Sjb} 791243046Sjeff 792243046Sjeff/* 793243046Sjeff * Wait specified idle threads to switch once. This ensures that even 794243046Sjeff * preempted threads have cycled through the switch function once, 795243046Sjeff * exiting their codepaths. This allows us to change global pointers 796243046Sjeff * with no other synchronization. 797243046Sjeff */ 798243046Sjeffint 799243046Sjeffquiesce_cpus(cpuset_t map, const char *wmesg, int prio) 800243046Sjeff{ 801243046Sjeff struct pcpu *pcpu; 802243046Sjeff u_int gen[MAXCPU]; 803243046Sjeff int error; 804243046Sjeff int cpu; 805243046Sjeff 806243046Sjeff error = 0; 807243046Sjeff for (cpu = 0; cpu <= mp_maxid; cpu++) { 808243046Sjeff if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu)) 809243046Sjeff continue; 810243046Sjeff pcpu = pcpu_find(cpu); 811243046Sjeff gen[cpu] = pcpu->pc_idlethread->td_generation; 812243046Sjeff } 813243046Sjeff for (cpu = 0; cpu <= mp_maxid; cpu++) { 814243046Sjeff if (!CPU_ISSET(cpu, &map) || CPU_ABSENT(cpu)) 815243046Sjeff continue; 816243046Sjeff pcpu = pcpu_find(cpu); 817243046Sjeff thread_lock(curthread); 818243046Sjeff sched_bind(curthread, cpu); 819243046Sjeff thread_unlock(curthread); 820243046Sjeff while (gen[cpu] == pcpu->pc_idlethread->td_generation) { 821243046Sjeff error = tsleep(quiesce_cpus, prio, wmesg, 1); 822244444Sjeff if (error != EWOULDBLOCK) 823243046Sjeff goto out; 824244444Sjeff error = 0; 825243046Sjeff } 826243046Sjeff } 827243046Sjeffout: 828243046Sjeff thread_lock(curthread); 829243046Sjeff sched_unbind(curthread); 830243046Sjeff thread_unlock(curthread); 831243046Sjeff 832243046Sjeff return (error); 833243046Sjeff} 834243046Sjeff 835243046Sjeffint 836243046Sjeffquiesce_all_cpus(const char *wmesg, int prio) 837243046Sjeff{ 838243046Sjeff 839243046Sjeff return quiesce_cpus(all_cpus, wmesg, prio); 840243046Sjeff} 841265606Sscottl 842265606Sscottl/* Extra care is taken with this sysctl because the data type is volatile */ 843265606Sscottlstatic int 844265606Sscottlsysctl_kern_smp_active(SYSCTL_HANDLER_ARGS) 845265606Sscottl{ 846265606Sscottl int error, active; 847265606Sscottl 848265606Sscottl active = smp_started; 849265606Sscottl error = SYSCTL_OUT(req, &active, sizeof(active)); 850265606Sscottl return (error); 851265606Sscottl} 852265606Sscottl 853