1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * CPU/APIC topology
4 *
5 * The APIC IDs describe the system topology in multiple domain levels.
6 * The CPUID topology parser provides the information which part of the
7 * APIC ID is associated to the individual levels:
8 *
9 * [PACKAGE][DIEGRP][DIE][TILE][MODULE][CORE][THREAD]
10 *
11 * The root space contains the package (socket) IDs.
12 *
13 * Not enumerated levels consume 0 bits space, but conceptually they are
14 * always represented. If e.g. only CORE and THREAD levels are enumerated
15 * then the DIE, MODULE and TILE have the same physical ID as the PACKAGE.
16 *
17 * If SMT is not supported, then the THREAD domain is still used. It then
18 * has the same physical ID as the CORE domain and is the only child of
19 * the core domain.
20 *
21 * This allows a unified view on the system independent of the enumerated
22 * domain levels without requiring any conditionals in the code.
23 */
24#define pr_fmt(fmt) "CPU topo: " fmt
25#include <linux/cpu.h>
26
27#include <xen/xen.h>
28
29#include <asm/apic.h>
30#include <asm/hypervisor.h>
31#include <asm/io_apic.h>
32#include <asm/mpspec.h>
33#include <asm/smp.h>
34
35#include "cpu.h"
36
37/*
38 * Map cpu index to physical APIC ID
39 */
40DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_apicid, BAD_APICID);
41DEFINE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid, CPU_ACPIID_INVALID);
42EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
43EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_acpiid);
44
45/* Bitmap of physically present CPUs. */
46DECLARE_BITMAP(phys_cpu_present_map, MAX_LOCAL_APIC) __read_mostly;
47
48/* Used for CPU number allocation and parallel CPU bringup */
49u32 cpuid_to_apicid[] __ro_after_init = { [0 ... NR_CPUS - 1] = BAD_APICID, };
50
51/* Bitmaps to mark registered APICs at each topology domain */
52static struct { DECLARE_BITMAP(map, MAX_LOCAL_APIC); } apic_maps[TOPO_MAX_DOMAIN] __ro_after_init;
53
54/*
55 * Keep track of assigned, disabled and rejected CPUs. Present assigned
56 * with 1 as CPU #0 is reserved for the boot CPU.
57 */
58static struct {
59	unsigned int		nr_assigned_cpus;
60	unsigned int		nr_disabled_cpus;
61	unsigned int		nr_rejected_cpus;
62	u32			boot_cpu_apic_id;
63	u32			real_bsp_apic_id;
64} topo_info __ro_after_init = {
65	.nr_assigned_cpus	= 1,
66	.boot_cpu_apic_id	= BAD_APICID,
67	.real_bsp_apic_id	= BAD_APICID,
68};
69
70#define domain_weight(_dom)	bitmap_weight(apic_maps[_dom].map, MAX_LOCAL_APIC)
71
72bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
73{
74	return phys_id == (u64)cpuid_to_apicid[cpu];
75}
76
77#ifdef CONFIG_SMP
78static void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid)
79{
80	if (!(apicid & (__max_threads_per_core - 1)))
81		cpumask_set_cpu(cpu, &__cpu_primary_thread_mask);
82}
83#else
84static inline void cpu_mark_primary_thread(unsigned int cpu, unsigned int apicid) { }
85#endif
86
87/*
88 * Convert the APIC ID to a domain level ID by masking out the low bits
89 * below the domain level @dom.
90 */
91static inline u32 topo_apicid(u32 apicid, enum x86_topology_domains dom)
92{
93	if (dom == TOPO_SMT_DOMAIN)
94		return apicid;
95	return apicid & (UINT_MAX << x86_topo_system.dom_shifts[dom - 1]);
96}
97
98static int topo_lookup_cpuid(u32 apic_id)
99{
100	int i;
101
102	/* CPU# to APICID mapping is persistent once it is established */
103	for (i = 0; i < topo_info.nr_assigned_cpus; i++) {
104		if (cpuid_to_apicid[i] == apic_id)
105			return i;
106	}
107	return -ENODEV;
108}
109
110static __init int topo_get_cpunr(u32 apic_id)
111{
112	int cpu = topo_lookup_cpuid(apic_id);
113
114	if (cpu >= 0)
115		return cpu;
116
117	return topo_info.nr_assigned_cpus++;
118}
119
120static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
121{
122#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
123	early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
124	early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
125#endif
126	set_cpu_present(cpu, true);
127}
128
129static __init bool check_for_real_bsp(u32 apic_id)
130{
131	/*
132	 * There is no real good way to detect whether this a kdump()
133	 * kernel, but except on the Voyager SMP monstrosity which is not
134	 * longer supported, the real BSP APIC ID is the first one which is
135	 * enumerated by firmware. That allows to detect whether the boot
136	 * CPU is the real BSP. If it is not, then do not register the APIC
137	 * because sending INIT to the real BSP would reset the whole
138	 * system.
139	 *
140	 * The first APIC ID which is enumerated by firmware is detectable
141	 * because the boot CPU APIC ID is registered before that without
142	 * invoking this code.
143	 */
144	if (topo_info.real_bsp_apic_id != BAD_APICID)
145		return false;
146
147	if (apic_id == topo_info.boot_cpu_apic_id) {
148		topo_info.real_bsp_apic_id = apic_id;
149		return false;
150	}
151
152	pr_warn("Boot CPU APIC ID not the first enumerated APIC ID: %x > %x\n",
153		topo_info.boot_cpu_apic_id, apic_id);
154	pr_warn("Crash kernel detected. Disabling real BSP to prevent machine INIT\n");
155
156	topo_info.real_bsp_apic_id = apic_id;
157	return true;
158}
159
160static unsigned int topo_unit_count(u32 lvlid, enum x86_topology_domains at_level,
161				    unsigned long *map)
162{
163	unsigned int id, end, cnt = 0;
164
165	/* Calculate the exclusive end */
166	end = lvlid + (1U << x86_topo_system.dom_shifts[at_level]);
167
168	/* Unfortunately there is no bitmap_weight_range() */
169	for (id = find_next_bit(map, end, lvlid); id < end; id = find_next_bit(map, end, ++id))
170		cnt++;
171	return cnt;
172}
173
174static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
175{
176	int cpu, dom;
177
178	if (present) {
179		set_bit(apic_id, phys_cpu_present_map);
180
181		/*
182		 * Double registration is valid in case of the boot CPU
183		 * APIC because that is registered before the enumeration
184		 * of the APICs via firmware parsers or VM guest
185		 * mechanisms.
186		 */
187		if (apic_id == topo_info.boot_cpu_apic_id)
188			cpu = 0;
189		else
190			cpu = topo_get_cpunr(apic_id);
191
192		cpuid_to_apicid[cpu] = apic_id;
193		topo_set_cpuids(cpu, apic_id, acpi_id);
194	} else {
195		u32 pkgid = topo_apicid(apic_id, TOPO_PKG_DOMAIN);
196
197		/*
198		 * Check for present APICs in the same package when running
199		 * on bare metal. Allow the bogosity in a guest.
200		 */
201		if (hypervisor_is_type(X86_HYPER_NATIVE) &&
202		    topo_unit_count(pkgid, TOPO_PKG_DOMAIN, phys_cpu_present_map)) {
203			pr_info_once("Ignoring hot-pluggable APIC ID %x in present package.\n",
204				     apic_id);
205			topo_info.nr_rejected_cpus++;
206			return;
207		}
208
209		topo_info.nr_disabled_cpus++;
210	}
211
212	/*
213	 * Register present and possible CPUs in the domain
214	 * maps. cpu_possible_map will be updated in
215	 * topology_init_possible_cpus() after enumeration is done.
216	 */
217	for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
218		set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
219}
220
221/**
222 * topology_register_apic - Register an APIC in early topology maps
223 * @apic_id:	The APIC ID to set up
224 * @acpi_id:	The ACPI ID associated to the APIC
225 * @present:	True if the corresponding CPU is present
226 */
227void __init topology_register_apic(u32 apic_id, u32 acpi_id, bool present)
228{
229	if (apic_id >= MAX_LOCAL_APIC) {
230		pr_err_once("APIC ID %x exceeds kernel limit of: %x\n", apic_id, MAX_LOCAL_APIC - 1);
231		topo_info.nr_rejected_cpus++;
232		return;
233	}
234
235	if (check_for_real_bsp(apic_id)) {
236		topo_info.nr_rejected_cpus++;
237		return;
238	}
239
240	/* CPU numbers exhausted? */
241	if (apic_id != topo_info.boot_cpu_apic_id && topo_info.nr_assigned_cpus >= nr_cpu_ids) {
242		pr_warn_once("CPU limit of %d reached. Ignoring further CPUs\n", nr_cpu_ids);
243		topo_info.nr_rejected_cpus++;
244		return;
245	}
246
247	topo_register_apic(apic_id, acpi_id, present);
248}
249
250/**
251 * topology_register_boot_apic - Register the boot CPU APIC
252 * @apic_id:	The APIC ID to set up
253 *
254 * Separate so CPU #0 can be assigned
255 */
256void __init topology_register_boot_apic(u32 apic_id)
257{
258	WARN_ON_ONCE(topo_info.boot_cpu_apic_id != BAD_APICID);
259
260	topo_info.boot_cpu_apic_id = apic_id;
261	topo_register_apic(apic_id, CPU_ACPIID_INVALID, true);
262}
263
264/**
265 * topology_get_logical_id - Retrieve the logical ID at a given topology domain level
266 * @apicid:		The APIC ID for which to lookup the logical ID
267 * @at_level:		The topology domain level to use
268 *
269 * @apicid must be a full APIC ID, not the normalized variant. It's valid to have
270 * all bits below the domain level specified by @at_level to be clear. So both
271 * real APIC IDs and backshifted normalized APIC IDs work correctly.
272 *
273 * Returns:
274 *  - >= 0:	The requested logical ID
275 *  - -ERANGE:	@apicid is out of range
276 *  - -ENODEV:	@apicid is not registered
277 */
278int topology_get_logical_id(u32 apicid, enum x86_topology_domains at_level)
279{
280	/* Remove the bits below @at_level to get the proper level ID of @apicid */
281	unsigned int lvlid = topo_apicid(apicid, at_level);
282
283	if (lvlid >= MAX_LOCAL_APIC)
284		return -ERANGE;
285	if (!test_bit(lvlid, apic_maps[at_level].map))
286		return -ENODEV;
287	/* Get the number of set bits before @lvlid. */
288	return bitmap_weight(apic_maps[at_level].map, lvlid);
289}
290EXPORT_SYMBOL_GPL(topology_get_logical_id);
291
292/**
293 * topology_unit_count - Retrieve the count of specified units at a given topology domain level
294 * @apicid:		The APIC ID which specifies the search range
295 * @which_units:	The domain level specifying the units to count
296 * @at_level:		The domain level at which @which_units have to be counted
297 *
298 * This returns the number of possible units according to the enumerated
299 * information.
300 *
301 * E.g. topology_count_units(apicid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN)
302 * counts the number of possible cores in the package to which @apicid
303 * belongs.
304 *
305 * @at_level must obviously be greater than @which_level to produce useful
306 * results.  If @at_level is equal to @which_units the result is
307 * unsurprisingly 1. If @at_level is less than @which_units the results
308 * is by definition undefined and the function returns 0.
309 */
310unsigned int topology_unit_count(u32 apicid, enum x86_topology_domains which_units,
311				 enum x86_topology_domains at_level)
312{
313	/* Remove the bits below @at_level to get the proper level ID of @apicid */
314	unsigned int lvlid = topo_apicid(apicid, at_level);
315
316	if (lvlid >= MAX_LOCAL_APIC)
317		return 0;
318	if (!test_bit(lvlid, apic_maps[at_level].map))
319		return 0;
320	if (which_units > at_level)
321		return 0;
322	if (which_units == at_level)
323		return 1;
324	return topo_unit_count(lvlid, at_level, apic_maps[which_units].map);
325}
326
327#ifdef CONFIG_ACPI_HOTPLUG_CPU
328/**
329 * topology_hotplug_apic - Handle a physical hotplugged APIC after boot
330 * @apic_id:	The APIC ID to set up
331 * @acpi_id:	The ACPI ID associated to the APIC
332 */
333int topology_hotplug_apic(u32 apic_id, u32 acpi_id)
334{
335	int cpu;
336
337	if (apic_id >= MAX_LOCAL_APIC)
338		return -EINVAL;
339
340	/* Reject if the APIC ID was not registered during enumeration. */
341	if (!test_bit(apic_id, apic_maps[TOPO_SMT_DOMAIN].map))
342		return -ENODEV;
343
344	cpu = topo_lookup_cpuid(apic_id);
345	if (cpu < 0)
346		return -ENOSPC;
347
348	set_bit(apic_id, phys_cpu_present_map);
349	topo_set_cpuids(cpu, apic_id, acpi_id);
350	cpu_mark_primary_thread(cpu, apic_id);
351	return cpu;
352}
353
354/**
355 * topology_hotunplug_apic - Remove a physical hotplugged APIC after boot
356 * @cpu:	The CPU number for which the APIC ID is removed
357 */
358void topology_hotunplug_apic(unsigned int cpu)
359{
360	u32 apic_id = cpuid_to_apicid[cpu];
361
362	if (apic_id == BAD_APICID)
363		return;
364
365	per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
366	clear_bit(apic_id, phys_cpu_present_map);
367	set_cpu_present(cpu, false);
368}
369#endif
370
371#ifdef CONFIG_X86_LOCAL_APIC
372static unsigned int max_possible_cpus __initdata = NR_CPUS;
373
374/**
375 * topology_apply_cmdline_limits_early - Apply topology command line limits early
376 *
377 * Ensure that command line limits are in effect before firmware parsing
378 * takes place.
379 */
380void __init topology_apply_cmdline_limits_early(void)
381{
382	unsigned int possible = nr_cpu_ids;
383
384	/* 'maxcpus=0' 'nosmp' 'nolapic' 'disableapic' 'noapic' */
385	if (!setup_max_cpus || ioapic_is_disabled || apic_is_disabled)
386		possible = 1;
387
388	/* 'possible_cpus=N' */
389	possible = min_t(unsigned int, max_possible_cpus, possible);
390
391	if (possible < nr_cpu_ids) {
392		pr_info("Limiting to %u possible CPUs\n", possible);
393		set_nr_cpu_ids(possible);
394	}
395}
396
397static __init bool restrict_to_up(void)
398{
399	if (!smp_found_config || ioapic_is_disabled)
400		return true;
401	/*
402	 * XEN PV is special as it does not advertise the local APIC
403	 * properly, but provides a fake topology for it so that the
404	 * infrastructure works. So don't apply the restrictions vs. APIC
405	 * here.
406	 */
407	if (xen_pv_domain())
408		return false;
409
410	return apic_is_disabled;
411}
412
413void __init topology_init_possible_cpus(void)
414{
415	unsigned int assigned = topo_info.nr_assigned_cpus;
416	unsigned int disabled = topo_info.nr_disabled_cpus;
417	unsigned int cnta, cntb, cpu, allowed = 1;
418	unsigned int total = assigned + disabled;
419	u32 apicid, firstid;
420
421	/*
422	 * If there was no APIC registered, then fake one so that the
423	 * topology bitmap is populated. That ensures that the code below
424	 * is valid and the various query interfaces can be used
425	 * unconditionally. This does not affect the actual APIC code in
426	 * any way because either the local APIC address has not been
427	 * registered or the local APIC was disabled on the command line.
428	 */
429	if (topo_info.boot_cpu_apic_id == BAD_APICID)
430		topology_register_boot_apic(0);
431
432	if (!restrict_to_up()) {
433		if (WARN_ON_ONCE(assigned > nr_cpu_ids)) {
434			disabled += assigned - nr_cpu_ids;
435			assigned = nr_cpu_ids;
436		}
437		allowed = min_t(unsigned int, total, nr_cpu_ids);
438	}
439
440	if (total > allowed)
441		pr_warn("%u possible CPUs exceed the limit of %u\n", total, allowed);
442
443	assigned = min_t(unsigned int, allowed, assigned);
444	disabled = allowed - assigned;
445
446	topo_info.nr_assigned_cpus = assigned;
447	topo_info.nr_disabled_cpus = disabled;
448
449	total_cpus = allowed;
450	set_nr_cpu_ids(allowed);
451
452	cnta = domain_weight(TOPO_PKG_DOMAIN);
453	cntb = domain_weight(TOPO_DIE_DOMAIN);
454	__max_logical_packages = cnta;
455	__max_dies_per_package = 1U << (get_count_order(cntb) - get_count_order(cnta));
456
457	pr_info("Max. logical packages: %3u\n", cnta);
458	pr_info("Max. logical dies:     %3u\n", cntb);
459	pr_info("Max. dies per package: %3u\n", __max_dies_per_package);
460
461	cnta = domain_weight(TOPO_CORE_DOMAIN);
462	cntb = domain_weight(TOPO_SMT_DOMAIN);
463	/*
464	 * Can't use order delta here as order(cnta) can be equal
465	 * order(cntb) even if cnta != cntb.
466	 */
467	__max_threads_per_core = DIV_ROUND_UP(cntb, cnta);
468	pr_info("Max. threads per core: %3u\n", __max_threads_per_core);
469
470	firstid = find_first_bit(apic_maps[TOPO_SMT_DOMAIN].map, MAX_LOCAL_APIC);
471	__num_cores_per_package = topology_unit_count(firstid, TOPO_CORE_DOMAIN, TOPO_PKG_DOMAIN);
472	pr_info("Num. cores per package:   %3u\n", __num_cores_per_package);
473	__num_threads_per_package = topology_unit_count(firstid, TOPO_SMT_DOMAIN, TOPO_PKG_DOMAIN);
474	pr_info("Num. threads per package: %3u\n", __num_threads_per_package);
475
476	pr_info("Allowing %u present CPUs plus %u hotplug CPUs\n", assigned, disabled);
477	if (topo_info.nr_rejected_cpus)
478		pr_info("Rejected CPUs %u\n", topo_info.nr_rejected_cpus);
479
480	init_cpu_present(cpumask_of(0));
481	init_cpu_possible(cpumask_of(0));
482
483	/* Assign CPU numbers to non-present CPUs */
484	for (apicid = 0; disabled; disabled--, apicid++) {
485		apicid = find_next_andnot_bit(apic_maps[TOPO_SMT_DOMAIN].map, phys_cpu_present_map,
486					      MAX_LOCAL_APIC, apicid);
487		if (apicid >= MAX_LOCAL_APIC)
488			break;
489		cpuid_to_apicid[topo_info.nr_assigned_cpus++] = apicid;
490	}
491
492	for (cpu = 0; cpu < allowed; cpu++) {
493		apicid = cpuid_to_apicid[cpu];
494
495		set_cpu_possible(cpu, true);
496
497		if (apicid == BAD_APICID)
498			continue;
499
500		cpu_mark_primary_thread(cpu, apicid);
501		set_cpu_present(cpu, test_bit(apicid, phys_cpu_present_map));
502	}
503}
504
505/*
506 * Late SMP disable after sizing CPU masks when APIC/IOAPIC setup failed.
507 */
508void __init topology_reset_possible_cpus_up(void)
509{
510	init_cpu_present(cpumask_of(0));
511	init_cpu_possible(cpumask_of(0));
512
513	bitmap_zero(phys_cpu_present_map, MAX_LOCAL_APIC);
514	if (topo_info.boot_cpu_apic_id != BAD_APICID)
515		set_bit(topo_info.boot_cpu_apic_id, phys_cpu_present_map);
516}
517
518static int __init setup_possible_cpus(char *str)
519{
520	get_option(&str, &max_possible_cpus);
521	return 0;
522}
523early_param("possible_cpus", setup_possible_cpus);
524#endif
525