1139825Simp/*-
225164Speter * ----------------------------------------------------------------------------
325164Speter * "THE BEER-WARE LICENSE" (Revision 42):
425164Speter * <phk@FreeBSD.org> wrote this file.  As long as you retain this notice you
525164Speter * can do whatever you want with this stuff. If we meet some day, and you think
625164Speter * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
725164Speter * ----------------------------------------------------------------------------
825164Speter *
950477Speter * $FreeBSD: stable/11/sys/sys/smp.h 331909 2018-04-03 07:31:22Z avg $
1025164Speter */
1125164Speter
1276078Sjhb#ifndef _SYS_SMP_H_
1376078Sjhb#define _SYS_SMP_H_
1425164Speter
1555205Speter#ifdef _KERNEL
1625164Speter
1776078Sjhb#ifndef LOCORE
1825517Sfsmp
19222813Sattilio#include <sys/cpuset.h>
20297558Savg#include <sys/queue.h>
21222813Sattilio
22117005Sjeff/*
23297558Savg * Types of nodes in the topological tree.
24297558Savg */
25297558Savgtypedef enum {
26297558Savg	/* No node has this type; can be used in topo API calls. */
27297558Savg	TOPO_TYPE_DUMMY,
28297558Savg	/* Processing unit aka computing unit aka logical CPU. */
29297558Savg	TOPO_TYPE_PU,
30297558Savg	/* Physical subdivision of a package. */
31297558Savg	TOPO_TYPE_CORE,
32297558Savg	/* CPU L1/L2/L3 cache. */
33297558Savg	TOPO_TYPE_CACHE,
34297558Savg	/* Package aka chip, equivalent to socket. */
35297558Savg	TOPO_TYPE_PKG,
36297558Savg	/* NUMA node. */
37297558Savg	TOPO_TYPE_NODE,
38297558Savg	/* Other logical or physical grouping of PUs. */
39297558Savg	/* E.g. PUs on the same dye, or PUs sharing an FPU. */
40297558Savg	TOPO_TYPE_GROUP,
41297558Savg	/* The whole system. */
42297558Savg	TOPO_TYPE_SYSTEM
43297558Savg} topo_node_type;
44297558Savg
45297558Savg/* Hardware indenitifier of a topology component. */
46297558Savgtypedef	unsigned int hwid_t;
47297558Savg/* Logical CPU idenitifier. */
48297558Savgtypedef	int cpuid_t;
49297558Savg
50297558Savg/* A node in the topology. */
51297558Savgstruct topo_node {
52297558Savg	struct topo_node			*parent;
53297558Savg	TAILQ_HEAD(topo_children, topo_node)	children;
54297558Savg	TAILQ_ENTRY(topo_node)			siblings;
55297558Savg	cpuset_t				cpuset;
56297558Savg	topo_node_type				type;
57297558Savg	uintptr_t				subtype;
58297558Savg	hwid_t					hwid;
59297558Savg	cpuid_t					id;
60297558Savg	int					nchildren;
61297558Savg	int					cpu_count;
62297558Savg};
63297558Savg
64297558Savg/*
65297558Savg * Scheduling topology of a NUMA or SMP system.
66117005Sjeff *
67117005Sjeff * The top level topology is an array of pointers to groups.  Each group
68117005Sjeff * contains a bitmask of cpus in its group or subgroups.  It may also
69117005Sjeff * contain a pointer to an array of child groups.
70117005Sjeff *
71117005Sjeff * The bitmasks at non leaf groups may be used by consumers who support
72117005Sjeff * a smaller depth than the hardware provides.
73117005Sjeff *
74117005Sjeff * The topology may be omitted by systems where all CPUs are equal.
75117005Sjeff */
76117005Sjeff
77117005Sjeffstruct cpu_group {
78176734Sjeff	struct cpu_group *cg_parent;	/* Our parent group. */
79176734Sjeff	struct cpu_group *cg_child;	/* Optional children groups. */
80222813Sattilio	cpuset_t	cg_mask;	/* Mask of cpus in this group. */
81222200Sattilio	int32_t		cg_count;	/* Count of cpus in this group. */
82222200Sattilio	int16_t		cg_children;	/* Number of children groups. */
83176734Sjeff	int8_t		cg_level;	/* Shared cache level. */
84176734Sjeff	int8_t		cg_flags;	/* Traversal modifiers. */
85117005Sjeff};
86117005Sjeff
87215159Snwhitehorntypedef struct cpu_group *cpu_group_t;
88215159Snwhitehorn
89176734Sjeff/*
90176734Sjeff * Defines common resources for CPUs in the group.  The highest level
91176734Sjeff * resource should be used when multiple are shared.
92176734Sjeff */
93176734Sjeff#define	CG_SHARE_NONE	0
94176734Sjeff#define	CG_SHARE_L1	1
95176734Sjeff#define	CG_SHARE_L2	2
96176734Sjeff#define	CG_SHARE_L3	3
97117005Sjeff
98297558Savg#define MAX_CACHE_LEVELS	CG_SHARE_L3
99297558Savg
100176734Sjeff/*
101176734Sjeff * Behavior modifiers for load balancing and affinity.
102176734Sjeff */
103176734Sjeff#define	CG_FLAG_HTT	0x01		/* Schedule the alternate core last. */
104191643Sjeff#define	CG_FLAG_SMT	0x02		/* New age htt, less crippled. */
105191643Sjeff#define	CG_FLAG_THREAD	(CG_FLAG_HTT | CG_FLAG_SMT)	/* Any threading. */
106176734Sjeff
107176734Sjeff/*
108297558Savg * Convenience routines for building and traversing topologies.
109176734Sjeff */
110215159Snwhitehorn#ifdef SMP
111297558Savgvoid topo_init_node(struct topo_node *node);
112297558Savgvoid topo_init_root(struct topo_node *root);
113297558Savgstruct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid,
114297558Savg    topo_node_type type, uintptr_t subtype);
115297558Savgstruct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid,
116297558Savg    topo_node_type type, uintptr_t subtype);
117297558Savgvoid topo_promote_child(struct topo_node *child);
118297558Savgstruct topo_node * topo_next_node(struct topo_node *top,
119297558Savg    struct topo_node *node);
120297558Savgstruct topo_node * topo_next_nonchild_node(struct topo_node *top,
121297558Savg    struct topo_node *node);
122297558Savgvoid topo_set_pu_id(struct topo_node *node, cpuid_t id);
123297558Savgint topo_analyze(struct topo_node *topo_root, int all, int *pkg_count,
124297558Savg    int *cores_per_pkg, int *thrs_per_core);
125297558Savg
126297558Savg#define	TOPO_FOREACH(i, root)	\
127297558Savg	for (i = root; i != NULL; i = topo_next_node(root, i))
128297558Savg
129176734Sjeffstruct cpu_group *smp_topo(void);
130297558Savgstruct cpu_group *smp_topo_alloc(u_int count);
131176734Sjeffstruct cpu_group *smp_topo_none(void);
132176734Sjeffstruct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags);
133176734Sjeffstruct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share,
134176734Sjeff    int l1count, int l1flags);
135176734Sjeffstruct cpu_group *smp_topo_find(struct cpu_group *top, int cpu);
136176734Sjeff
13776078Sjhbextern void (*cpustop_restartfunc)(void);
13876078Sjhbextern int smp_cpus;
139331909Savg/* The suspend/resume cpusets are x86 only, but minimize ifdefs. */
140331909Savgextern volatile cpuset_t resuming_cpus;	/* woken up cpus in suspend pen */
141331909Savgextern volatile cpuset_t started_cpus;	/* cpus to let out of stop pen */
142331909Savgextern volatile cpuset_t stopped_cpus;	/* cpus in stop pen */
143331909Savgextern volatile cpuset_t suspended_cpus; /* cpus [near] sleeping in susp pen */
144331909Savgextern volatile cpuset_t toresume_cpus;	/* cpus to let out of suspend pen */
145331909Savgextern cpuset_t hlt_cpus_mask;		/* XXX 'mask' is detail in old impl */
146222813Sattilioextern cpuset_t logical_cpus_mask;
147123125Sjhb#endif /* SMP */
148123125Sjhb
14991673Sjeffextern u_int mp_maxid;
150134689Sjulianextern int mp_maxcpus;
151123125Sjhbextern int mp_ncpus;
152123125Sjhbextern volatile int smp_started;
15327728Sfsmp
154222813Sattilioextern cpuset_t all_cpus;
155276829Sjhbextern cpuset_t cpuset_domain[MAXMEMDOM]; 	/* CPUs in each NUMA domain. */
156134591Sjulian
15727002Sfsmp/*
15880779Sbmilekic * Macro allowing us to determine whether a CPU is absent at any given
15980779Sbmilekic * time, thus permitting us to configure sparse maps of cpuid-dependent
16080779Sbmilekic * (per-CPU) structures.
16180779Sbmilekic */
162222813Sattilio#define	CPU_ABSENT(x_cpu)	(!CPU_ISSET(x_cpu, &all_cpus))
16380779Sbmilekic
164209050Sjhb/*
165209050Sjhb * Macros to iterate over non-absent CPUs.  CPU_FOREACH() takes an
166209050Sjhb * integer iterator and iterates over the available set of CPUs.
167209050Sjhb * CPU_FIRST() returns the id of the first non-absent CPU.  CPU_NEXT()
168209050Sjhb * returns the id of the next non-absent CPU.  It will wrap back to
169209050Sjhb * CPU_FIRST() once the end of the list is reached.  The iterators are
170209050Sjhb * currently implemented via inline functions.
171209050Sjhb */
172209050Sjhb#define	CPU_FOREACH(i)							\
173209050Sjhb	for ((i) = 0; (i) <= mp_maxid; (i)++)				\
174209050Sjhb		if (!CPU_ABSENT((i)))
175209050Sjhb
176209050Sjhbstatic __inline int
177209050Sjhbcpu_first(void)
178209050Sjhb{
179209050Sjhb	int i;
180209050Sjhb
181209050Sjhb	for (i = 0;; i++)
182209050Sjhb		if (!CPU_ABSENT(i))
183209050Sjhb			return (i);
184209050Sjhb}
185209050Sjhb
186209050Sjhbstatic __inline int
187209050Sjhbcpu_next(int i)
188209050Sjhb{
189209050Sjhb
190209050Sjhb	for (;;) {
191209050Sjhb		i++;
192209050Sjhb		if (i > mp_maxid)
193209050Sjhb			i = 0;
194209050Sjhb		if (!CPU_ABSENT(i))
195209050Sjhb			return (i);
196209050Sjhb	}
197209050Sjhb}
198209050Sjhb
199209050Sjhb#define	CPU_FIRST()	cpu_first()
200209050Sjhb#define	CPU_NEXT(i)	cpu_next((i))
201209050Sjhb
202123125Sjhb#ifdef SMP
20380779Sbmilekic/*
20476078Sjhb * Machine dependent functions used to initialize MP support.
20576078Sjhb *
20676078Sjhb * The cpu_mp_probe() should check to see if MP support is present and return
20776078Sjhb * zero if it is not or non-zero if it is.  If MP support is present, then
20876078Sjhb * cpu_mp_start() will be called so that MP can be enabled.  This function
20976078Sjhb * should do things such as startup secondary processors.  It should also
21076078Sjhb * setup mp_ncpus, all_cpus, and smp_cpus.  It should also ensure that
211264984Sscottl * smp_started is initialized at the appropriate time.
21276078Sjhb * Once cpu_mp_start() returns, machine independent MP startup code will be
21376078Sjhb * executed and a simple message will be output to the console.  Finally,
21476078Sjhb * cpu_mp_announce() will be called so that machine dependent messages about
21576078Sjhb * the MP support may be output to the console if desired.
216122947Sjhb *
217122947Sjhb * The cpu_setmaxid() function is called very early during the boot process
218122947Sjhb * so that the MD code may set mp_maxid to provide an upper bound on CPU IDs
219122947Sjhb * that other subsystems may use.  If a platform is not able to determine
220123126Sjhb * the exact maximum ID that early, then it may set mp_maxid to MAXCPU - 1.
22127002Sfsmp */
22296999Sjakestruct thread;
22396999Sjake
224176734Sjeffstruct cpu_group *cpu_topo(void);
22576078Sjhbvoid	cpu_mp_announce(void);
22676078Sjhbint	cpu_mp_probe(void);
227122947Sjhbvoid	cpu_mp_setmaxid(void);
22876078Sjhbvoid	cpu_mp_start(void);
22927002Sfsmp
23083366Sjulianvoid	forward_signal(struct thread *);
231222813Sattilioint	restart_cpus(cpuset_t);
232222813Sattilioint	stop_cpus(cpuset_t);
233222813Sattilioint	stop_cpus_hard(cpuset_t);
234235622Siwasaki#if defined(__amd64__) || defined(__i386__)
235222813Sattilioint	suspend_cpus(cpuset_t);
236255726Sgibbsint	resume_cpus(cpuset_t);
237189903Sjkim#endif
238243046Sjeff
23976078Sjhbvoid	smp_rendezvous_action(void);
240134416Sobrienextern	struct mtx smp_ipi_mtx;
241145727Sdwhite
242123125Sjhb#endif /* SMP */
243243046Sjeff
244243046Sjeffint	quiesce_all_cpus(const char *, int);
245243046Sjeffint	quiesce_cpus(cpuset_t, const char *, int);
246328386Spkelsey/*
247328386Spkelsey * smp_no_rendevous_barrier was renamed to smp_no_rendezvous_barrier
248328386Spkelsey * in __FreeBSD_version 1101508, with the old name remaining in 11.x
249328386Spkelsey * as an alias for compatibility.  The old name will be gone in 12.0
250328386Spkelsey * (__FreeBSD_version >= 1200028).
251328386Spkelsey */
252179230Sjbvoid	smp_no_rendevous_barrier(void *);
253328386Spkelseyvoid	smp_no_rendezvous_barrier(void *);
25476078Sjhbvoid	smp_rendezvous(void (*)(void *),
25576078Sjhb		       void (*)(void *),
25676078Sjhb		       void (*)(void *),
25776078Sjhb		       void *arg);
258222813Sattiliovoid	smp_rendezvous_cpus(cpuset_t,
259179230Sjb		       void (*)(void *),
260179230Sjb		       void (*)(void *),
261179230Sjb		       void (*)(void *),
262179230Sjb		       void *arg);
26327728Sfsmp#endif /* !LOCORE */
26455205Speter#endif /* _KERNEL */
26576078Sjhb#endif /* _SYS_SMP_H_ */
266