1139825Simp/*- 225164Speter * ---------------------------------------------------------------------------- 325164Speter * "THE BEER-WARE LICENSE" (Revision 42): 425164Speter * <phk@FreeBSD.org> wrote this file. As long as you retain this notice you 525164Speter * can do whatever you want with this stuff. If we meet some day, and you think 625164Speter * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 725164Speter * ---------------------------------------------------------------------------- 825164Speter * 950477Speter * $FreeBSD: stable/11/sys/sys/smp.h 331909 2018-04-03 07:31:22Z avg $ 1025164Speter */ 1125164Speter 1276078Sjhb#ifndef _SYS_SMP_H_ 1376078Sjhb#define _SYS_SMP_H_ 1425164Speter 1555205Speter#ifdef _KERNEL 1625164Speter 1776078Sjhb#ifndef LOCORE 1825517Sfsmp 19222813Sattilio#include <sys/cpuset.h> 20297558Savg#include <sys/queue.h> 21222813Sattilio 22117005Sjeff/* 23297558Savg * Types of nodes in the topological tree. 24297558Savg */ 25297558Savgtypedef enum { 26297558Savg /* No node has this type; can be used in topo API calls. */ 27297558Savg TOPO_TYPE_DUMMY, 28297558Savg /* Processing unit aka computing unit aka logical CPU. */ 29297558Savg TOPO_TYPE_PU, 30297558Savg /* Physical subdivision of a package. */ 31297558Savg TOPO_TYPE_CORE, 32297558Savg /* CPU L1/L2/L3 cache. */ 33297558Savg TOPO_TYPE_CACHE, 34297558Savg /* Package aka chip, equivalent to socket. */ 35297558Savg TOPO_TYPE_PKG, 36297558Savg /* NUMA node. */ 37297558Savg TOPO_TYPE_NODE, 38297558Savg /* Other logical or physical grouping of PUs. */ 39297558Savg /* E.g. PUs on the same dye, or PUs sharing an FPU. */ 40297558Savg TOPO_TYPE_GROUP, 41297558Savg /* The whole system. */ 42297558Savg TOPO_TYPE_SYSTEM 43297558Savg} topo_node_type; 44297558Savg 45297558Savg/* Hardware indenitifier of a topology component. */ 46297558Savgtypedef unsigned int hwid_t; 47297558Savg/* Logical CPU idenitifier. */ 48297558Savgtypedef int cpuid_t; 49297558Savg 50297558Savg/* A node in the topology. */ 51297558Savgstruct topo_node { 52297558Savg struct topo_node *parent; 53297558Savg TAILQ_HEAD(topo_children, topo_node) children; 54297558Savg TAILQ_ENTRY(topo_node) siblings; 55297558Savg cpuset_t cpuset; 56297558Savg topo_node_type type; 57297558Savg uintptr_t subtype; 58297558Savg hwid_t hwid; 59297558Savg cpuid_t id; 60297558Savg int nchildren; 61297558Savg int cpu_count; 62297558Savg}; 63297558Savg 64297558Savg/* 65297558Savg * Scheduling topology of a NUMA or SMP system. 66117005Sjeff * 67117005Sjeff * The top level topology is an array of pointers to groups. Each group 68117005Sjeff * contains a bitmask of cpus in its group or subgroups. It may also 69117005Sjeff * contain a pointer to an array of child groups. 70117005Sjeff * 71117005Sjeff * The bitmasks at non leaf groups may be used by consumers who support 72117005Sjeff * a smaller depth than the hardware provides. 73117005Sjeff * 74117005Sjeff * The topology may be omitted by systems where all CPUs are equal. 75117005Sjeff */ 76117005Sjeff 77117005Sjeffstruct cpu_group { 78176734Sjeff struct cpu_group *cg_parent; /* Our parent group. */ 79176734Sjeff struct cpu_group *cg_child; /* Optional children groups. */ 80222813Sattilio cpuset_t cg_mask; /* Mask of cpus in this group. */ 81222200Sattilio int32_t cg_count; /* Count of cpus in this group. */ 82222200Sattilio int16_t cg_children; /* Number of children groups. */ 83176734Sjeff int8_t cg_level; /* Shared cache level. */ 84176734Sjeff int8_t cg_flags; /* Traversal modifiers. */ 85117005Sjeff}; 86117005Sjeff 87215159Snwhitehorntypedef struct cpu_group *cpu_group_t; 88215159Snwhitehorn 89176734Sjeff/* 90176734Sjeff * Defines common resources for CPUs in the group. The highest level 91176734Sjeff * resource should be used when multiple are shared. 92176734Sjeff */ 93176734Sjeff#define CG_SHARE_NONE 0 94176734Sjeff#define CG_SHARE_L1 1 95176734Sjeff#define CG_SHARE_L2 2 96176734Sjeff#define CG_SHARE_L3 3 97117005Sjeff 98297558Savg#define MAX_CACHE_LEVELS CG_SHARE_L3 99297558Savg 100176734Sjeff/* 101176734Sjeff * Behavior modifiers for load balancing and affinity. 102176734Sjeff */ 103176734Sjeff#define CG_FLAG_HTT 0x01 /* Schedule the alternate core last. */ 104191643Sjeff#define CG_FLAG_SMT 0x02 /* New age htt, less crippled. */ 105191643Sjeff#define CG_FLAG_THREAD (CG_FLAG_HTT | CG_FLAG_SMT) /* Any threading. */ 106176734Sjeff 107176734Sjeff/* 108297558Savg * Convenience routines for building and traversing topologies. 109176734Sjeff */ 110215159Snwhitehorn#ifdef SMP 111297558Savgvoid topo_init_node(struct topo_node *node); 112297558Savgvoid topo_init_root(struct topo_node *root); 113297558Savgstruct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid, 114297558Savg topo_node_type type, uintptr_t subtype); 115297558Savgstruct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid, 116297558Savg topo_node_type type, uintptr_t subtype); 117297558Savgvoid topo_promote_child(struct topo_node *child); 118297558Savgstruct topo_node * topo_next_node(struct topo_node *top, 119297558Savg struct topo_node *node); 120297558Savgstruct topo_node * topo_next_nonchild_node(struct topo_node *top, 121297558Savg struct topo_node *node); 122297558Savgvoid topo_set_pu_id(struct topo_node *node, cpuid_t id); 123297558Savgint topo_analyze(struct topo_node *topo_root, int all, int *pkg_count, 124297558Savg int *cores_per_pkg, int *thrs_per_core); 125297558Savg 126297558Savg#define TOPO_FOREACH(i, root) \ 127297558Savg for (i = root; i != NULL; i = topo_next_node(root, i)) 128297558Savg 129176734Sjeffstruct cpu_group *smp_topo(void); 130297558Savgstruct cpu_group *smp_topo_alloc(u_int count); 131176734Sjeffstruct cpu_group *smp_topo_none(void); 132176734Sjeffstruct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags); 133176734Sjeffstruct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share, 134176734Sjeff int l1count, int l1flags); 135176734Sjeffstruct cpu_group *smp_topo_find(struct cpu_group *top, int cpu); 136176734Sjeff 13776078Sjhbextern void (*cpustop_restartfunc)(void); 13876078Sjhbextern int smp_cpus; 139331909Savg/* The suspend/resume cpusets are x86 only, but minimize ifdefs. */ 140331909Savgextern volatile cpuset_t resuming_cpus; /* woken up cpus in suspend pen */ 141331909Savgextern volatile cpuset_t started_cpus; /* cpus to let out of stop pen */ 142331909Savgextern volatile cpuset_t stopped_cpus; /* cpus in stop pen */ 143331909Savgextern volatile cpuset_t suspended_cpus; /* cpus [near] sleeping in susp pen */ 144331909Savgextern volatile cpuset_t toresume_cpus; /* cpus to let out of suspend pen */ 145331909Savgextern cpuset_t hlt_cpus_mask; /* XXX 'mask' is detail in old impl */ 146222813Sattilioextern cpuset_t logical_cpus_mask; 147123125Sjhb#endif /* SMP */ 148123125Sjhb 14991673Sjeffextern u_int mp_maxid; 150134689Sjulianextern int mp_maxcpus; 151123125Sjhbextern int mp_ncpus; 152123125Sjhbextern volatile int smp_started; 15327728Sfsmp 154222813Sattilioextern cpuset_t all_cpus; 155276829Sjhbextern cpuset_t cpuset_domain[MAXMEMDOM]; /* CPUs in each NUMA domain. */ 156134591Sjulian 15727002Sfsmp/* 15880779Sbmilekic * Macro allowing us to determine whether a CPU is absent at any given 15980779Sbmilekic * time, thus permitting us to configure sparse maps of cpuid-dependent 16080779Sbmilekic * (per-CPU) structures. 16180779Sbmilekic */ 162222813Sattilio#define CPU_ABSENT(x_cpu) (!CPU_ISSET(x_cpu, &all_cpus)) 16380779Sbmilekic 164209050Sjhb/* 165209050Sjhb * Macros to iterate over non-absent CPUs. CPU_FOREACH() takes an 166209050Sjhb * integer iterator and iterates over the available set of CPUs. 167209050Sjhb * CPU_FIRST() returns the id of the first non-absent CPU. CPU_NEXT() 168209050Sjhb * returns the id of the next non-absent CPU. It will wrap back to 169209050Sjhb * CPU_FIRST() once the end of the list is reached. The iterators are 170209050Sjhb * currently implemented via inline functions. 171209050Sjhb */ 172209050Sjhb#define CPU_FOREACH(i) \ 173209050Sjhb for ((i) = 0; (i) <= mp_maxid; (i)++) \ 174209050Sjhb if (!CPU_ABSENT((i))) 175209050Sjhb 176209050Sjhbstatic __inline int 177209050Sjhbcpu_first(void) 178209050Sjhb{ 179209050Sjhb int i; 180209050Sjhb 181209050Sjhb for (i = 0;; i++) 182209050Sjhb if (!CPU_ABSENT(i)) 183209050Sjhb return (i); 184209050Sjhb} 185209050Sjhb 186209050Sjhbstatic __inline int 187209050Sjhbcpu_next(int i) 188209050Sjhb{ 189209050Sjhb 190209050Sjhb for (;;) { 191209050Sjhb i++; 192209050Sjhb if (i > mp_maxid) 193209050Sjhb i = 0; 194209050Sjhb if (!CPU_ABSENT(i)) 195209050Sjhb return (i); 196209050Sjhb } 197209050Sjhb} 198209050Sjhb 199209050Sjhb#define CPU_FIRST() cpu_first() 200209050Sjhb#define CPU_NEXT(i) cpu_next((i)) 201209050Sjhb 202123125Sjhb#ifdef SMP 20380779Sbmilekic/* 20476078Sjhb * Machine dependent functions used to initialize MP support. 20576078Sjhb * 20676078Sjhb * The cpu_mp_probe() should check to see if MP support is present and return 20776078Sjhb * zero if it is not or non-zero if it is. If MP support is present, then 20876078Sjhb * cpu_mp_start() will be called so that MP can be enabled. This function 20976078Sjhb * should do things such as startup secondary processors. It should also 21076078Sjhb * setup mp_ncpus, all_cpus, and smp_cpus. It should also ensure that 211264984Sscottl * smp_started is initialized at the appropriate time. 21276078Sjhb * Once cpu_mp_start() returns, machine independent MP startup code will be 21376078Sjhb * executed and a simple message will be output to the console. Finally, 21476078Sjhb * cpu_mp_announce() will be called so that machine dependent messages about 21576078Sjhb * the MP support may be output to the console if desired. 216122947Sjhb * 217122947Sjhb * The cpu_setmaxid() function is called very early during the boot process 218122947Sjhb * so that the MD code may set mp_maxid to provide an upper bound on CPU IDs 219122947Sjhb * that other subsystems may use. If a platform is not able to determine 220123126Sjhb * the exact maximum ID that early, then it may set mp_maxid to MAXCPU - 1. 22127002Sfsmp */ 22296999Sjakestruct thread; 22396999Sjake 224176734Sjeffstruct cpu_group *cpu_topo(void); 22576078Sjhbvoid cpu_mp_announce(void); 22676078Sjhbint cpu_mp_probe(void); 227122947Sjhbvoid cpu_mp_setmaxid(void); 22876078Sjhbvoid cpu_mp_start(void); 22927002Sfsmp 23083366Sjulianvoid forward_signal(struct thread *); 231222813Sattilioint restart_cpus(cpuset_t); 232222813Sattilioint stop_cpus(cpuset_t); 233222813Sattilioint stop_cpus_hard(cpuset_t); 234235622Siwasaki#if defined(__amd64__) || defined(__i386__) 235222813Sattilioint suspend_cpus(cpuset_t); 236255726Sgibbsint resume_cpus(cpuset_t); 237189903Sjkim#endif 238243046Sjeff 23976078Sjhbvoid smp_rendezvous_action(void); 240134416Sobrienextern struct mtx smp_ipi_mtx; 241145727Sdwhite 242123125Sjhb#endif /* SMP */ 243243046Sjeff 244243046Sjeffint quiesce_all_cpus(const char *, int); 245243046Sjeffint quiesce_cpus(cpuset_t, const char *, int); 246328386Spkelsey/* 247328386Spkelsey * smp_no_rendevous_barrier was renamed to smp_no_rendezvous_barrier 248328386Spkelsey * in __FreeBSD_version 1101508, with the old name remaining in 11.x 249328386Spkelsey * as an alias for compatibility. The old name will be gone in 12.0 250328386Spkelsey * (__FreeBSD_version >= 1200028). 251328386Spkelsey */ 252179230Sjbvoid smp_no_rendevous_barrier(void *); 253328386Spkelseyvoid smp_no_rendezvous_barrier(void *); 25476078Sjhbvoid smp_rendezvous(void (*)(void *), 25576078Sjhb void (*)(void *), 25676078Sjhb void (*)(void *), 25776078Sjhb void *arg); 258222813Sattiliovoid smp_rendezvous_cpus(cpuset_t, 259179230Sjb void (*)(void *), 260179230Sjb void (*)(void *), 261179230Sjb void (*)(void *), 262179230Sjb void *arg); 26327728Sfsmp#endif /* !LOCORE */ 26455205Speter#endif /* _KERNEL */ 26576078Sjhb#endif /* _SYS_SMP_H_ */ 266