1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Hardware Feedback Interface Driver
4 *
5 * Copyright (c) 2021, Intel Corporation.
6 *
7 * Authors: Aubrey Li <aubrey.li@linux.intel.com>
8 *          Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
9 *
10 *
11 * The Hardware Feedback Interface provides a performance and energy efficiency
12 * capability information for each CPU in the system. Depending on the processor
13 * model, hardware may periodically update these capabilities as a result of
14 * changes in the operating conditions (e.g., power limits or thermal
15 * constraints). On other processor models, there is a single HFI update
16 * at boot.
17 *
18 * This file provides functionality to process HFI updates and relay these
19 * updates to userspace.
20 */
21
22#define pr_fmt(fmt)  "intel-hfi: " fmt
23
24#include <linux/bitops.h>
25#include <linux/cpufeature.h>
26#include <linux/cpumask.h>
27#include <linux/delay.h>
28#include <linux/gfp.h>
29#include <linux/io.h>
30#include <linux/kernel.h>
31#include <linux/math.h>
32#include <linux/mutex.h>
33#include <linux/percpu-defs.h>
34#include <linux/printk.h>
35#include <linux/processor.h>
36#include <linux/slab.h>
37#include <linux/spinlock.h>
38#include <linux/suspend.h>
39#include <linux/string.h>
40#include <linux/syscore_ops.h>
41#include <linux/topology.h>
42#include <linux/workqueue.h>
43
44#include <asm/msr.h>
45
46#include "intel_hfi.h"
47#include "thermal_interrupt.h"
48
49#include "../thermal_netlink.h"
50
51/* Hardware Feedback Interface MSR configuration bits */
52#define HW_FEEDBACK_PTR_VALID_BIT		BIT(0)
53#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT	BIT(0)
54
55/* CPUID detection and enumeration definitions for HFI */
56
57#define CPUID_HFI_LEAF 6
58
59union hfi_capabilities {
60	struct {
61		u8	performance:1;
62		u8	energy_efficiency:1;
63		u8	__reserved:6;
64	} split;
65	u8 bits;
66};
67
68union cpuid6_edx {
69	struct {
70		union hfi_capabilities	capabilities;
71		u32			table_pages:4;
72		u32			__reserved:4;
73		s32			index:16;
74	} split;
75	u32 full;
76};
77
78/**
79 * struct hfi_cpu_data - HFI capabilities per CPU
80 * @perf_cap:		Performance capability
81 * @ee_cap:		Energy efficiency capability
82 *
83 * Capabilities of a logical processor in the HFI table. These capabilities are
84 * unitless.
85 */
86struct hfi_cpu_data {
87	u8	perf_cap;
88	u8	ee_cap;
89} __packed;
90
91/**
92 * struct hfi_hdr - Header of the HFI table
93 * @perf_updated:	Hardware updated performance capabilities
94 * @ee_updated:		Hardware updated energy efficiency capabilities
95 *
96 * Properties of the data in an HFI table.
97 */
98struct hfi_hdr {
99	u8	perf_updated;
100	u8	ee_updated;
101} __packed;
102
103/**
104 * struct hfi_instance - Representation of an HFI instance (i.e., a table)
105 * @local_table:	Base of the local copy of the HFI table
106 * @timestamp:		Timestamp of the last update of the local table.
107 *			Located at the base of the local table.
108 * @hdr:		Base address of the header of the local table
109 * @data:		Base address of the data of the local table
110 * @cpus:		CPUs represented in this HFI table instance
111 * @hw_table:		Pointer to the HFI table of this instance
112 * @update_work:	Delayed work to process HFI updates
113 * @table_lock:		Lock to protect acceses to the table of this instance
114 * @event_lock:		Lock to process HFI interrupts
115 *
116 * A set of parameters to parse and navigate a specific HFI table.
117 */
118struct hfi_instance {
119	union {
120		void			*local_table;
121		u64			*timestamp;
122	};
123	void			*hdr;
124	void			*data;
125	cpumask_var_t		cpus;
126	void			*hw_table;
127	struct delayed_work	update_work;
128	raw_spinlock_t		table_lock;
129	raw_spinlock_t		event_lock;
130};
131
132/**
133 * struct hfi_features - Supported HFI features
134 * @nr_table_pages:	Size of the HFI table in 4KB pages
135 * @cpu_stride:		Stride size to locate the capability data of a logical
136 *			processor within the table (i.e., row stride)
137 * @hdr_size:		Size of the table header
138 *
139 * Parameters and supported features that are common to all HFI instances
140 */
141struct hfi_features {
142	size_t		nr_table_pages;
143	unsigned int	cpu_stride;
144	unsigned int	hdr_size;
145};
146
147/**
148 * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
149 * @index:		Row of this CPU in its HFI table
150 * @hfi_instance:	Attributes of the HFI table to which this CPU belongs
151 *
152 * Parameters to link a logical processor to an HFI table and a row within it.
153 */
154struct hfi_cpu_info {
155	s16			index;
156	struct hfi_instance	*hfi_instance;
157};
158
159static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
160
161static int max_hfi_instances;
162static struct hfi_instance *hfi_instances;
163
164static struct hfi_features hfi_features;
165static DEFINE_MUTEX(hfi_instance_lock);
166
167static struct workqueue_struct *hfi_updates_wq;
168#define HFI_UPDATE_INTERVAL		HZ
169#define HFI_MAX_THERM_NOTIFY_COUNT	16
170
171static void get_hfi_caps(struct hfi_instance *hfi_instance,
172			 struct thermal_genl_cpu_caps *cpu_caps)
173{
174	int cpu, i = 0;
175
176	raw_spin_lock_irq(&hfi_instance->table_lock);
177	for_each_cpu(cpu, hfi_instance->cpus) {
178		struct hfi_cpu_data *caps;
179		s16 index;
180
181		index = per_cpu(hfi_cpu_info, cpu).index;
182		caps = hfi_instance->data + index * hfi_features.cpu_stride;
183		cpu_caps[i].cpu = cpu;
184
185		/*
186		 * Scale performance and energy efficiency to
187		 * the [0, 1023] interval that thermal netlink uses.
188		 */
189		cpu_caps[i].performance = caps->perf_cap << 2;
190		cpu_caps[i].efficiency = caps->ee_cap << 2;
191
192		++i;
193	}
194	raw_spin_unlock_irq(&hfi_instance->table_lock);
195}
196
197/*
198 * Call update_capabilities() when there are changes in the HFI table.
199 */
200static void update_capabilities(struct hfi_instance *hfi_instance)
201{
202	struct thermal_genl_cpu_caps *cpu_caps;
203	int i = 0, cpu_count;
204
205	/* CPUs may come online/offline while processing an HFI update. */
206	mutex_lock(&hfi_instance_lock);
207
208	cpu_count = cpumask_weight(hfi_instance->cpus);
209
210	/* No CPUs to report in this hfi_instance. */
211	if (!cpu_count)
212		goto out;
213
214	cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL);
215	if (!cpu_caps)
216		goto out;
217
218	get_hfi_caps(hfi_instance, cpu_caps);
219
220	if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT)
221		goto last_cmd;
222
223	/* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */
224	for (i = 0;
225	     (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count;
226	     i += HFI_MAX_THERM_NOTIFY_COUNT)
227		thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT,
228						  &cpu_caps[i]);
229
230	cpu_count = cpu_count - i;
231
232last_cmd:
233	/* Process the remaining capabilities if any. */
234	if (cpu_count)
235		thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]);
236
237	kfree(cpu_caps);
238out:
239	mutex_unlock(&hfi_instance_lock);
240}
241
242static void hfi_update_work_fn(struct work_struct *work)
243{
244	struct hfi_instance *hfi_instance;
245
246	hfi_instance = container_of(to_delayed_work(work), struct hfi_instance,
247				    update_work);
248
249	update_capabilities(hfi_instance);
250}
251
252void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
253{
254	struct hfi_instance *hfi_instance;
255	int cpu = smp_processor_id();
256	struct hfi_cpu_info *info;
257	u64 new_timestamp, msr, hfi;
258
259	if (!pkg_therm_status_msr_val)
260		return;
261
262	info = &per_cpu(hfi_cpu_info, cpu);
263	if (!info)
264		return;
265
266	/*
267	 * A CPU is linked to its HFI instance before the thermal vector in the
268	 * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL
269	 * when receiving an HFI event.
270	 */
271	hfi_instance = info->hfi_instance;
272	if (unlikely(!hfi_instance)) {
273		pr_debug("Received event on CPU %d but instance was null", cpu);
274		return;
275	}
276
277	/*
278	 * On most systems, all CPUs in the package receive a package-level
279	 * thermal interrupt when there is an HFI update. It is sufficient to
280	 * let a single CPU to acknowledge the update and queue work to
281	 * process it. The remaining CPUs can resume their work.
282	 */
283	if (!raw_spin_trylock(&hfi_instance->event_lock))
284		return;
285
286	rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr);
287	hfi = msr & PACKAGE_THERM_STATUS_HFI_UPDATED;
288	if (!hfi) {
289		raw_spin_unlock(&hfi_instance->event_lock);
290		return;
291	}
292
293	/*
294	 * Ack duplicate update. Since there is an active HFI
295	 * status from HW, it must be a new event, not a case
296	 * where a lagging CPU entered the locked region.
297	 */
298	new_timestamp = *(u64 *)hfi_instance->hw_table;
299	if (*hfi_instance->timestamp == new_timestamp) {
300		thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
301		raw_spin_unlock(&hfi_instance->event_lock);
302		return;
303	}
304
305	raw_spin_lock(&hfi_instance->table_lock);
306
307	/*
308	 * Copy the updated table into our local copy. This includes the new
309	 * timestamp.
310	 */
311	memcpy(hfi_instance->local_table, hfi_instance->hw_table,
312	       hfi_features.nr_table_pages << PAGE_SHIFT);
313
314	/*
315	 * Let hardware know that we are done reading the HFI table and it is
316	 * free to update it again.
317	 */
318	thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
319
320	raw_spin_unlock(&hfi_instance->table_lock);
321	raw_spin_unlock(&hfi_instance->event_lock);
322
323	queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
324			   HFI_UPDATE_INTERVAL);
325}
326
327static void init_hfi_cpu_index(struct hfi_cpu_info *info)
328{
329	union cpuid6_edx edx;
330
331	/* Do not re-read @cpu's index if it has already been initialized. */
332	if (info->index > -1)
333		return;
334
335	edx.full = cpuid_edx(CPUID_HFI_LEAF);
336	info->index = edx.split.index;
337}
338
339/*
340 * The format of the HFI table depends on the number of capabilities that the
341 * hardware supports. Keep a data structure to navigate the table.
342 */
343static void init_hfi_instance(struct hfi_instance *hfi_instance)
344{
345	/* The HFI header is below the time-stamp. */
346	hfi_instance->hdr = hfi_instance->local_table +
347			    sizeof(*hfi_instance->timestamp);
348
349	/* The HFI data starts below the header. */
350	hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
351}
352
353/* Caller must hold hfi_instance_lock. */
354static void hfi_enable(void)
355{
356	u64 msr_val;
357
358	rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
359	msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
360	wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
361}
362
363static void hfi_set_hw_table(struct hfi_instance *hfi_instance)
364{
365	phys_addr_t hw_table_pa;
366	u64 msr_val;
367
368	hw_table_pa = virt_to_phys(hfi_instance->hw_table);
369	msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
370	wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
371}
372
373/* Caller must hold hfi_instance_lock. */
374static void hfi_disable(void)
375{
376	u64 msr_val;
377	int i;
378
379	rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
380	msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
381	wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
382
383	/*
384	 * Wait for hardware to acknowledge the disabling of HFI. Some
385	 * processors may not do it. Wait for ~2ms. This is a reasonable
386	 * time for hardware to complete any pending actions on the HFI
387	 * memory.
388	 */
389	for (i = 0; i < 2000; i++) {
390		rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
391		if (msr_val & PACKAGE_THERM_STATUS_HFI_UPDATED)
392			break;
393
394		udelay(1);
395		cpu_relax();
396	}
397}
398
399/**
400 * intel_hfi_online() - Enable HFI on @cpu
401 * @cpu:	CPU in which the HFI will be enabled
402 *
403 * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
404 * level. The first CPU in the die/package to come online does the full HFI
405 * initialization. Subsequent CPUs will just link themselves to the HFI
406 * instance of their die/package.
407 *
408 * This function is called before enabling the thermal vector in the local APIC
409 * in order to ensure that @cpu has an associated HFI instance when it receives
410 * an HFI event.
411 */
412void intel_hfi_online(unsigned int cpu)
413{
414	struct hfi_instance *hfi_instance;
415	struct hfi_cpu_info *info;
416	u16 die_id;
417
418	/* Nothing to do if hfi_instances are missing. */
419	if (!hfi_instances)
420		return;
421
422	/*
423	 * Link @cpu to the HFI instance of its package/die. It does not
424	 * matter whether the instance has been initialized.
425	 */
426	info = &per_cpu(hfi_cpu_info, cpu);
427	die_id = topology_logical_die_id(cpu);
428	hfi_instance = info->hfi_instance;
429	if (!hfi_instance) {
430		if (die_id >= max_hfi_instances)
431			return;
432
433		hfi_instance = &hfi_instances[die_id];
434		info->hfi_instance = hfi_instance;
435	}
436
437	init_hfi_cpu_index(info);
438
439	/*
440	 * Now check if the HFI instance of the package/die of @cpu has been
441	 * initialized (by checking its header). In such case, all we have to
442	 * do is to add @cpu to this instance's cpumask and enable the instance
443	 * if needed.
444	 */
445	mutex_lock(&hfi_instance_lock);
446	if (hfi_instance->hdr)
447		goto enable;
448
449	/*
450	 * Hardware is programmed with the physical address of the first page
451	 * frame of the table. Hence, the allocated memory must be page-aligned.
452	 *
453	 * Some processors do not forget the initial address of the HFI table
454	 * even after having been reprogrammed. Keep using the same pages. Do
455	 * not free them.
456	 */
457	hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
458						   GFP_KERNEL | __GFP_ZERO);
459	if (!hfi_instance->hw_table)
460		goto unlock;
461
462	/*
463	 * Allocate memory to keep a local copy of the table that
464	 * hardware generates.
465	 */
466	hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
467					    GFP_KERNEL);
468	if (!hfi_instance->local_table)
469		goto free_hw_table;
470
471	init_hfi_instance(hfi_instance);
472
473	INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
474	raw_spin_lock_init(&hfi_instance->table_lock);
475	raw_spin_lock_init(&hfi_instance->event_lock);
476
477enable:
478	cpumask_set_cpu(cpu, hfi_instance->cpus);
479
480	/* Enable this HFI instance if this is its first online CPU. */
481	if (cpumask_weight(hfi_instance->cpus) == 1) {
482		hfi_set_hw_table(hfi_instance);
483		hfi_enable();
484	}
485
486unlock:
487	mutex_unlock(&hfi_instance_lock);
488	return;
489
490free_hw_table:
491	free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
492	goto unlock;
493}
494
495/**
496 * intel_hfi_offline() - Disable HFI on @cpu
497 * @cpu:	CPU in which the HFI will be disabled
498 *
499 * Remove @cpu from those covered by its HFI instance.
500 *
501 * On some processors, hardware remembers previous programming settings even
502 * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
503 * die/package of @cpu are offline. See note in intel_hfi_online().
504 */
505void intel_hfi_offline(unsigned int cpu)
506{
507	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
508	struct hfi_instance *hfi_instance;
509
510	/*
511	 * Check if @cpu as an associated, initialized (i.e., with a non-NULL
512	 * header). Also, HFI instances are only initialized if X86_FEATURE_HFI
513	 * is present.
514	 */
515	hfi_instance = info->hfi_instance;
516	if (!hfi_instance)
517		return;
518
519	if (!hfi_instance->hdr)
520		return;
521
522	mutex_lock(&hfi_instance_lock);
523	cpumask_clear_cpu(cpu, hfi_instance->cpus);
524
525	if (!cpumask_weight(hfi_instance->cpus))
526		hfi_disable();
527
528	mutex_unlock(&hfi_instance_lock);
529}
530
531static __init int hfi_parse_features(void)
532{
533	unsigned int nr_capabilities;
534	union cpuid6_edx edx;
535
536	if (!boot_cpu_has(X86_FEATURE_HFI))
537		return -ENODEV;
538
539	/*
540	 * If we are here we know that CPUID_HFI_LEAF exists. Parse the
541	 * supported capabilities and the size of the HFI table.
542	 */
543	edx.full = cpuid_edx(CPUID_HFI_LEAF);
544
545	if (!edx.split.capabilities.split.performance) {
546		pr_debug("Performance reporting not supported! Not using HFI\n");
547		return -ENODEV;
548	}
549
550	/*
551	 * The number of supported capabilities determines the number of
552	 * columns in the HFI table. Exclude the reserved bits.
553	 */
554	edx.split.capabilities.split.__reserved = 0;
555	nr_capabilities = hweight8(edx.split.capabilities.bits);
556
557	/* The number of 4KB pages required by the table */
558	hfi_features.nr_table_pages = edx.split.table_pages + 1;
559
560	/*
561	 * The header contains change indications for each supported feature.
562	 * The size of the table header is rounded up to be a multiple of 8
563	 * bytes.
564	 */
565	hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
566
567	/*
568	 * Data of each logical processor is also rounded up to be a multiple
569	 * of 8 bytes.
570	 */
571	hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
572
573	return 0;
574}
575
576static void hfi_do_enable(void)
577{
578	/* This code runs only on the boot CPU. */
579	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
580	struct hfi_instance *hfi_instance = info->hfi_instance;
581
582	/* No locking needed. There is no concurrency with CPU online. */
583	hfi_set_hw_table(hfi_instance);
584	hfi_enable();
585}
586
587static int hfi_do_disable(void)
588{
589	/* No locking needed. There is no concurrency with CPU offline. */
590	hfi_disable();
591
592	return 0;
593}
594
595static struct syscore_ops hfi_pm_ops = {
596	.resume = hfi_do_enable,
597	.suspend = hfi_do_disable,
598};
599
600void __init intel_hfi_init(void)
601{
602	struct hfi_instance *hfi_instance;
603	int i, j;
604
605	if (hfi_parse_features())
606		return;
607
608	/* There is one HFI instance per die/package. */
609	max_hfi_instances = topology_max_packages() *
610			    topology_max_dies_per_package();
611
612	/*
613	 * This allocation may fail. CPU hotplug callbacks must check
614	 * for a null pointer.
615	 */
616	hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances),
617				GFP_KERNEL);
618	if (!hfi_instances)
619		return;
620
621	for (i = 0; i < max_hfi_instances; i++) {
622		hfi_instance = &hfi_instances[i];
623		if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL))
624			goto err_nomem;
625	}
626
627	hfi_updates_wq = create_singlethread_workqueue("hfi-updates");
628	if (!hfi_updates_wq)
629		goto err_nomem;
630
631	register_syscore_ops(&hfi_pm_ops);
632
633	return;
634
635err_nomem:
636	for (j = 0; j < i; ++j) {
637		hfi_instance = &hfi_instances[j];
638		free_cpumask_var(hfi_instance->cpus);
639	}
640
641	kfree(hfi_instances);
642	hfi_instances = NULL;
643}
644