1// SPDX-License-Identifier: GPL-2.0-only
2
3/*
4 * Copyright(c) 2023 Huawei
5 *
6 * The CXL 3.0 specification includes a standard Performance Monitoring Unit,
7 * called the CXL PMU, or CPMU. In order to allow a high degree of
8 * implementation flexibility the specification provides a wide range of
9 * options all of which are self describing.
10 *
11 * Details in CXL rev 3.0 section 8.2.7 CPMU Register Interface
12 */
13
14#include <linux/io-64-nonatomic-lo-hi.h>
15#include <linux/perf_event.h>
16#include <linux/bitops.h>
17#include <linux/device.h>
18#include <linux/bits.h>
19#include <linux/list.h>
20#include <linux/bug.h>
21#include <linux/pci.h>
22
23#include "../cxl/cxlpci.h"
24#include "../cxl/cxl.h"
25#include "../cxl/pmu.h"
26
27#define CXL_PMU_CAP_REG			0x0
28#define   CXL_PMU_CAP_NUM_COUNTERS_MSK			GENMASK_ULL(5, 0)
29#define   CXL_PMU_CAP_COUNTER_WIDTH_MSK			GENMASK_ULL(15, 8)
30#define   CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK		GENMASK_ULL(24, 20)
31#define   CXL_PMU_CAP_FILTERS_SUP_MSK			GENMASK_ULL(39, 32)
32#define     CXL_PMU_FILTER_HDM				BIT(0)
33#define     CXL_PMU_FILTER_CHAN_RANK_BANK		BIT(1)
34#define   CXL_PMU_CAP_MSI_N_MSK				GENMASK_ULL(47, 44)
35#define   CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN		BIT_ULL(48)
36#define   CXL_PMU_CAP_FREEZE				BIT_ULL(49)
37#define   CXL_PMU_CAP_INT				BIT_ULL(50)
38#define   CXL_PMU_CAP_VERSION_MSK			GENMASK_ULL(63, 60)
39
40#define CXL_PMU_OVERFLOW_REG		0x10
41#define CXL_PMU_FREEZE_REG		0x18
42#define CXL_PMU_EVENT_CAP_REG(n)	(0x100 + 8 * (n))
43#define   CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK	GENMASK_ULL(31, 0)
44#define   CXL_PMU_EVENT_CAP_GROUP_ID_MSK		GENMASK_ULL(47, 32)
45#define   CXL_PMU_EVENT_CAP_VENDOR_ID_MSK		GENMASK_ULL(63, 48)
46
47#define CXL_PMU_COUNTER_CFG_REG(n)	(0x200 + 8 * (n))
48#define   CXL_PMU_COUNTER_CFG_TYPE_MSK			GENMASK_ULL(1, 0)
49#define     CXL_PMU_COUNTER_CFG_TYPE_FREE_RUN		0
50#define     CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN		1
51#define     CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE	2
52#define   CXL_PMU_COUNTER_CFG_ENABLE			BIT_ULL(8)
53#define   CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW		BIT_ULL(9)
54#define   CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW		BIT_ULL(10)
55#define   CXL_PMU_COUNTER_CFG_EDGE			BIT_ULL(11)
56#define   CXL_PMU_COUNTER_CFG_INVERT			BIT_ULL(12)
57#define   CXL_PMU_COUNTER_CFG_THRESHOLD_MSK		GENMASK_ULL(23, 16)
58#define   CXL_PMU_COUNTER_CFG_EVENTS_MSK		GENMASK_ULL(55, 24)
59#define   CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK	GENMASK_ULL(63, 59)
60
61#define CXL_PMU_FILTER_CFG_REG(n, f)	(0x400 + 4 * ((f) + (n) * 8))
62#define   CXL_PMU_FILTER_CFG_VALUE_MSK			GENMASK(31, 0)
63
64#define CXL_PMU_COUNTER_REG(n)		(0xc00 + 8 * (n))
65
66/* CXL rev 3.0 Table 13-5 Events under CXL Vendor ID */
67#define CXL_PMU_GID_CLOCK_TICKS		0x00
68#define CXL_PMU_GID_D2H_REQ		0x0010
69#define CXL_PMU_GID_D2H_RSP		0x0011
70#define CXL_PMU_GID_H2D_REQ		0x0012
71#define CXL_PMU_GID_H2D_RSP		0x0013
72#define CXL_PMU_GID_CACHE_DATA		0x0014
73#define CXL_PMU_GID_M2S_REQ		0x0020
74#define CXL_PMU_GID_M2S_RWD		0x0021
75#define CXL_PMU_GID_M2S_BIRSP		0x0022
76#define CXL_PMU_GID_S2M_BISNP		0x0023
77#define CXL_PMU_GID_S2M_NDR		0x0024
78#define CXL_PMU_GID_S2M_DRS		0x0025
79#define CXL_PMU_GID_DDR			0x8000
80
81static int cxl_pmu_cpuhp_state_num;
82
83struct cxl_pmu_ev_cap {
84	u16 vid;
85	u16 gid;
86	u32 msk;
87	union {
88		int counter_idx; /* fixed counters */
89		int event_idx; /* configurable counters */
90	};
91	struct list_head node;
92};
93
94#define CXL_PMU_MAX_COUNTERS 64
95struct cxl_pmu_info {
96	struct pmu pmu;
97	void __iomem *base;
98	struct perf_event **hw_events;
99	struct list_head event_caps_configurable;
100	struct list_head event_caps_fixed;
101	DECLARE_BITMAP(used_counter_bm, CXL_PMU_MAX_COUNTERS);
102	DECLARE_BITMAP(conf_counter_bm, CXL_PMU_MAX_COUNTERS);
103	u16 counter_width;
104	u8 num_counters;
105	u8 num_event_capabilities;
106	int on_cpu;
107	struct hlist_node node;
108	bool filter_hdm;
109	int irq;
110};
111
112#define pmu_to_cxl_pmu_info(_pmu) container_of(_pmu, struct cxl_pmu_info, pmu)
113
114/*
115 * All CPMU counters are discoverable via the Event Capabilities Registers.
116 * Each Event Capability register contains a a VID / GroupID.
117 * A counter may then count any combination (by summing) of events in
118 * that group which are in the Supported Events Bitmask.
119 * However, there are some complexities to the scheme.
120 *  - Fixed function counters refer to an Event Capabilities register.
121 *    That event capability register is not then used for Configurable
122 *    counters.
123 */
124static int cxl_pmu_parse_caps(struct device *dev, struct cxl_pmu_info *info)
125{
126	unsigned long fixed_counter_event_cap_bm = 0;
127	void __iomem *base = info->base;
128	bool freeze_for_enable;
129	u64 val, eval;
130	int i;
131
132	val = readq(base + CXL_PMU_CAP_REG);
133	freeze_for_enable = FIELD_GET(CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN, val) &&
134		FIELD_GET(CXL_PMU_CAP_FREEZE, val);
135	if (!freeze_for_enable) {
136		dev_err(dev, "Counters not writable while frozen\n");
137		return -ENODEV;
138	}
139
140	info->num_counters = FIELD_GET(CXL_PMU_CAP_NUM_COUNTERS_MSK, val) + 1;
141	info->counter_width = FIELD_GET(CXL_PMU_CAP_COUNTER_WIDTH_MSK, val);
142	info->num_event_capabilities = FIELD_GET(CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK, val) + 1;
143
144	info->filter_hdm = FIELD_GET(CXL_PMU_CAP_FILTERS_SUP_MSK, val) & CXL_PMU_FILTER_HDM;
145	if (FIELD_GET(CXL_PMU_CAP_INT, val))
146		info->irq = FIELD_GET(CXL_PMU_CAP_MSI_N_MSK, val);
147	else
148		info->irq = -1;
149
150	/* First handle fixed function counters; note if configurable counters found */
151	for (i = 0; i < info->num_counters; i++) {
152		struct cxl_pmu_ev_cap *pmu_ev;
153		u32 events_msk;
154		u8 group_idx;
155
156		val = readq(base + CXL_PMU_COUNTER_CFG_REG(i));
157
158		if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) ==
159			CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE) {
160			set_bit(i, info->conf_counter_bm);
161		}
162
163		if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) !=
164		    CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN)
165			continue;
166
167		/* In this case we know which fields are const */
168		group_idx = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK, val);
169		events_msk = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENTS_MSK, val);
170		eval = readq(base + CXL_PMU_EVENT_CAP_REG(group_idx));
171		pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
172		if (!pmu_ev)
173			return -ENOMEM;
174
175		pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
176		pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
177		/* For a fixed purpose counter use the events mask from the counter CFG */
178		pmu_ev->msk = events_msk;
179		pmu_ev->counter_idx = i;
180		/* This list add is never unwound as all entries deleted on remove */
181		list_add(&pmu_ev->node, &info->event_caps_fixed);
182		/*
183		 * Configurable counters must not use an Event Capability registers that
184		 * is in use for a Fixed counter
185		 */
186		set_bit(group_idx, &fixed_counter_event_cap_bm);
187	}
188
189	if (!bitmap_empty(info->conf_counter_bm, CXL_PMU_MAX_COUNTERS)) {
190		struct cxl_pmu_ev_cap *pmu_ev;
191		int j;
192		/* Walk event capabilities unused by fixed counters */
193		for_each_clear_bit(j, &fixed_counter_event_cap_bm,
194				   info->num_event_capabilities) {
195			pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
196			if (!pmu_ev)
197				return -ENOMEM;
198
199			eval = readq(base + CXL_PMU_EVENT_CAP_REG(j));
200			pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
201			pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
202			pmu_ev->msk = FIELD_GET(CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK, eval);
203			pmu_ev->event_idx = j;
204			list_add(&pmu_ev->node, &info->event_caps_configurable);
205		}
206	}
207
208	return 0;
209}
210
211static ssize_t cxl_pmu_format_sysfs_show(struct device *dev,
212					 struct device_attribute *attr, char *buf)
213{
214	struct dev_ext_attribute *eattr;
215
216	eattr = container_of(attr, struct dev_ext_attribute, attr);
217
218	return sysfs_emit(buf, "%s\n", (char *)eattr->var);
219}
220
221#define CXL_PMU_FORMAT_ATTR(_name, _format)\
222	(&((struct dev_ext_attribute[]) {					\
223		{								\
224			.attr = __ATTR(_name, 0444,				\
225				       cxl_pmu_format_sysfs_show, NULL),	\
226			.var = (void *)_format					\
227		}								\
228		})[0].attr.attr)
229
230enum {
231	cxl_pmu_mask_attr,
232	cxl_pmu_gid_attr,
233	cxl_pmu_vid_attr,
234	cxl_pmu_threshold_attr,
235	cxl_pmu_invert_attr,
236	cxl_pmu_edge_attr,
237	cxl_pmu_hdm_filter_en_attr,
238	cxl_pmu_hdm_attr,
239};
240
241static struct attribute *cxl_pmu_format_attr[] = {
242	[cxl_pmu_mask_attr] = CXL_PMU_FORMAT_ATTR(mask, "config:0-31"),
243	[cxl_pmu_gid_attr] = CXL_PMU_FORMAT_ATTR(gid, "config:32-47"),
244	[cxl_pmu_vid_attr] = CXL_PMU_FORMAT_ATTR(vid, "config:48-63"),
245	[cxl_pmu_threshold_attr] = CXL_PMU_FORMAT_ATTR(threshold, "config1:0-15"),
246	[cxl_pmu_invert_attr] = CXL_PMU_FORMAT_ATTR(invert, "config1:16"),
247	[cxl_pmu_edge_attr] = CXL_PMU_FORMAT_ATTR(edge, "config1:17"),
248	[cxl_pmu_hdm_filter_en_attr] = CXL_PMU_FORMAT_ATTR(hdm_filter_en, "config1:18"),
249	[cxl_pmu_hdm_attr] = CXL_PMU_FORMAT_ATTR(hdm, "config2:0-15"),
250	NULL
251};
252
253#define CXL_PMU_ATTR_CONFIG_MASK_MSK		GENMASK_ULL(31, 0)
254#define CXL_PMU_ATTR_CONFIG_GID_MSK		GENMASK_ULL(47, 32)
255#define CXL_PMU_ATTR_CONFIG_VID_MSK		GENMASK_ULL(63, 48)
256#define CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK	GENMASK_ULL(15, 0)
257#define CXL_PMU_ATTR_CONFIG1_INVERT_MSK		BIT(16)
258#define CXL_PMU_ATTR_CONFIG1_EDGE_MSK		BIT(17)
259#define CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK	BIT(18)
260#define CXL_PMU_ATTR_CONFIG2_HDM_MSK		GENMASK(15, 0)
261
262static umode_t cxl_pmu_format_is_visible(struct kobject *kobj,
263					 struct attribute *attr, int a)
264{
265	struct device *dev = kobj_to_dev(kobj);
266	struct cxl_pmu_info *info = dev_get_drvdata(dev);
267
268	/*
269	 * Filter capability at the CPMU level, so hide the attributes if the particular
270	 * filter is not supported.
271	 */
272	if (!info->filter_hdm &&
273	    (attr == cxl_pmu_format_attr[cxl_pmu_hdm_filter_en_attr] ||
274	     attr == cxl_pmu_format_attr[cxl_pmu_hdm_attr]))
275		return 0;
276
277	return attr->mode;
278}
279
280static const struct attribute_group cxl_pmu_format_group = {
281	.name = "format",
282	.attrs = cxl_pmu_format_attr,
283	.is_visible = cxl_pmu_format_is_visible,
284};
285
286static u32 cxl_pmu_config_get_mask(struct perf_event *event)
287{
288	return FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, event->attr.config);
289}
290
291static u16 cxl_pmu_config_get_gid(struct perf_event *event)
292{
293	return FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, event->attr.config);
294}
295
296static u16 cxl_pmu_config_get_vid(struct perf_event *event)
297{
298	return FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, event->attr.config);
299}
300
301static u8 cxl_pmu_config1_get_threshold(struct perf_event *event)
302{
303	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK, event->attr.config1);
304}
305
306static bool cxl_pmu_config1_get_invert(struct perf_event *event)
307{
308	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_INVERT_MSK, event->attr.config1);
309}
310
311static bool cxl_pmu_config1_get_edge(struct perf_event *event)
312{
313	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_EDGE_MSK, event->attr.config1);
314}
315
316/*
317 * CPMU specification allows for 8 filters, each with a 32 bit value...
318 * So we need to find 8x32bits to store it in.
319 * As the value used for disable is 0xffff_ffff, a separate enable switch
320 * is needed.
321 */
322
323static bool cxl_pmu_config1_hdm_filter_en(struct perf_event *event)
324{
325	return FIELD_GET(CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK, event->attr.config1);
326}
327
328static u16 cxl_pmu_config2_get_hdm_decoder(struct perf_event *event)
329{
330	return FIELD_GET(CXL_PMU_ATTR_CONFIG2_HDM_MSK, event->attr.config2);
331}
332
333static ssize_t cxl_pmu_event_sysfs_show(struct device *dev,
334					struct device_attribute *attr, char *buf)
335{
336	struct perf_pmu_events_attr *pmu_attr =
337		container_of(attr, struct perf_pmu_events_attr, attr);
338
339	return sysfs_emit(buf, "config=%#llx\n", pmu_attr->id);
340}
341
342#define CXL_PMU_EVENT_ATTR(_name, _vid, _gid, _msk)			\
343	PMU_EVENT_ATTR_ID(_name, cxl_pmu_event_sysfs_show,		\
344			  ((u64)(_vid) << 48) | ((u64)(_gid) << 32) | (u64)(_msk))
345
346/* For CXL spec defined events */
347#define CXL_PMU_EVENT_CXL_ATTR(_name, _gid, _msk)			\
348	CXL_PMU_EVENT_ATTR(_name, PCI_DVSEC_VENDOR_ID_CXL, _gid, _msk)
349
350static struct attribute *cxl_pmu_event_attrs[] = {
351	CXL_PMU_EVENT_CXL_ATTR(clock_ticks,			CXL_PMU_GID_CLOCK_TICKS, BIT(0)),
352	/* CXL rev 3.0 Table 3-17 - Device to Host Requests */
353	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdcurr,			CXL_PMU_GID_D2H_REQ, BIT(1)),
354	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdown,			CXL_PMU_GID_D2H_REQ, BIT(2)),
355	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdshared,		CXL_PMU_GID_D2H_REQ, BIT(3)),
356	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdany,			CXL_PMU_GID_D2H_REQ, BIT(4)),
357	CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdownnodata,		CXL_PMU_GID_D2H_REQ, BIT(5)),
358	CXL_PMU_EVENT_CXL_ATTR(d2h_req_itomwr,			CXL_PMU_GID_D2H_REQ, BIT(6)),
359	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrcurr,			CXL_PMU_GID_D2H_REQ, BIT(7)),
360	CXL_PMU_EVENT_CXL_ATTR(d2h_req_clflush,			CXL_PMU_GID_D2H_REQ, BIT(8)),
361	CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevict,		CXL_PMU_GID_D2H_REQ, BIT(9)),
362	CXL_PMU_EVENT_CXL_ATTR(d2h_req_dirtyevict,		CXL_PMU_GID_D2H_REQ, BIT(10)),
363	CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevictnodata,	CXL_PMU_GID_D2H_REQ, BIT(11)),
364	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinv,			CXL_PMU_GID_D2H_REQ, BIT(12)),
365	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinvf,		CXL_PMU_GID_D2H_REQ, BIT(13)),
366	CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrinv,			CXL_PMU_GID_D2H_REQ, BIT(14)),
367	CXL_PMU_EVENT_CXL_ATTR(d2h_req_cacheflushed,		CXL_PMU_GID_D2H_REQ, BIT(16)),
368	/* CXL rev 3.0 Table 3-20 - D2H Repsonse Encodings */
369	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihiti,		CXL_PMU_GID_D2H_RSP, BIT(4)),
370	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvhitv,		CXL_PMU_GID_D2H_RSP, BIT(6)),
371	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihitse,		CXL_PMU_GID_D2H_RSP, BIT(5)),
372	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspshitse,		CXL_PMU_GID_D2H_RSP, BIT(1)),
373	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspsfwdm,		CXL_PMU_GID_D2H_RSP, BIT(7)),
374	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspifwdm,		CXL_PMU_GID_D2H_RSP, BIT(15)),
375	CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvfwdv,		CXL_PMU_GID_D2H_RSP, BIT(22)),
376	/* CXL rev 3.0 Table 3-21 - CXL.cache - Mapping of H2D Requests to D2H Responses */
377	CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpdata,			CXL_PMU_GID_H2D_REQ, BIT(1)),
378	CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpinv,			CXL_PMU_GID_H2D_REQ, BIT(2)),
379	CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpcur,			CXL_PMU_GID_H2D_REQ, BIT(3)),
380	/* CXL rev 3.0 Table 3-22 - H2D Response Opcode Encodings */
381	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_writepull,		CXL_PMU_GID_H2D_RSP, BIT(1)),
382	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_go,			CXL_PMU_GID_H2D_RSP, BIT(4)),
383	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepull,		CXL_PMU_GID_H2D_RSP, BIT(5)),
384	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_extcmp,			CXL_PMU_GID_H2D_RSP, BIT(6)),
385	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepulldrop,		CXL_PMU_GID_H2D_RSP, BIT(8)),
386	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_fastgowritepull,		CXL_PMU_GID_H2D_RSP, BIT(13)),
387	CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_goerrwritepull,		CXL_PMU_GID_H2D_RSP, BIT(15)),
388	/* CXL rev 3.0 Table 13-5 directly lists these */
389	CXL_PMU_EVENT_CXL_ATTR(cachedata_d2h_data,		CXL_PMU_GID_CACHE_DATA, BIT(0)),
390	CXL_PMU_EVENT_CXL_ATTR(cachedata_h2d_data,		CXL_PMU_GID_CACHE_DATA, BIT(1)),
391	/* CXL rev 3.0 Table 3-29 M2S Req Memory Opcodes */
392	CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminv,			CXL_PMU_GID_M2S_REQ, BIT(0)),
393	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrd,			CXL_PMU_GID_M2S_REQ, BIT(1)),
394	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddata,		CXL_PMU_GID_M2S_REQ, BIT(2)),
395	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdfwd,		CXL_PMU_GID_M2S_REQ, BIT(3)),
396	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memwrfwd,		CXL_PMU_GID_M2S_REQ, BIT(4)),
397	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memspecrd,		CXL_PMU_GID_M2S_REQ, BIT(8)),
398	CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminvnt,		CXL_PMU_GID_M2S_REQ, BIT(9)),
399	CXL_PMU_EVENT_CXL_ATTR(m2s_req_memcleanevict,		CXL_PMU_GID_M2S_REQ, BIT(10)),
400	/* CXL rev 3.0 Table 3-35 M2S RwD Memory Opcodes */
401	CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwr,			CXL_PMU_GID_M2S_RWD, BIT(1)),
402	CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwrptl,		CXL_PMU_GID_M2S_RWD, BIT(2)),
403	CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_biconflict,		CXL_PMU_GID_M2S_RWD, BIT(4)),
404	/* CXL rev 3.0 Table 3-38 M2S BIRsp Memory Opcodes */
405	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_i,			CXL_PMU_GID_M2S_BIRSP, BIT(0)),
406	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_s,			CXL_PMU_GID_M2S_BIRSP, BIT(1)),
407	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_e,			CXL_PMU_GID_M2S_BIRSP, BIT(2)),
408	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_iblk,			CXL_PMU_GID_M2S_BIRSP, BIT(4)),
409	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_sblk,			CXL_PMU_GID_M2S_BIRSP, BIT(5)),
410	CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_eblk,			CXL_PMU_GID_M2S_BIRSP, BIT(6)),
411	/* CXL rev 3.0 Table 3-40 S2M BISnp Opcodes */
412	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_cur,			CXL_PMU_GID_S2M_BISNP, BIT(0)),
413	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_data,			CXL_PMU_GID_S2M_BISNP, BIT(1)),
414	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_inv,			CXL_PMU_GID_S2M_BISNP, BIT(2)),
415	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_curblk,		CXL_PMU_GID_S2M_BISNP, BIT(4)),
416	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_datblk,		CXL_PMU_GID_S2M_BISNP, BIT(5)),
417	CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_invblk,		CXL_PMU_GID_S2M_BISNP, BIT(6)),
418	/* CXL rev 3.0 Table 3-43 S2M NDR Opcopdes */
419	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp,			CXL_PMU_GID_S2M_NDR, BIT(0)),
420	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps,			CXL_PMU_GID_S2M_NDR, BIT(1)),
421	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe,			CXL_PMU_GID_S2M_NDR, BIT(2)),
422	CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack,		CXL_PMU_GID_S2M_NDR, BIT(4)),
423	/* CXL rev 3.0 Table 3-46 S2M DRS opcodes */
424	CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata,			CXL_PMU_GID_S2M_DRS, BIT(0)),
425	CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdatanxm,		CXL_PMU_GID_S2M_DRS, BIT(1)),
426	/* CXL rev 3.0 Table 13-5 directly lists these */
427	CXL_PMU_EVENT_CXL_ATTR(ddr_act,				CXL_PMU_GID_DDR, BIT(0)),
428	CXL_PMU_EVENT_CXL_ATTR(ddr_pre,				CXL_PMU_GID_DDR, BIT(1)),
429	CXL_PMU_EVENT_CXL_ATTR(ddr_casrd,			CXL_PMU_GID_DDR, BIT(2)),
430	CXL_PMU_EVENT_CXL_ATTR(ddr_caswr,			CXL_PMU_GID_DDR, BIT(3)),
431	CXL_PMU_EVENT_CXL_ATTR(ddr_refresh,			CXL_PMU_GID_DDR, BIT(4)),
432	CXL_PMU_EVENT_CXL_ATTR(ddr_selfrefreshent,		CXL_PMU_GID_DDR, BIT(5)),
433	CXL_PMU_EVENT_CXL_ATTR(ddr_rfm,				CXL_PMU_GID_DDR, BIT(6)),
434	NULL
435};
436
437static struct cxl_pmu_ev_cap *cxl_pmu_find_fixed_counter_ev_cap(struct cxl_pmu_info *info,
438								int vid, int gid, int msk)
439{
440	struct cxl_pmu_ev_cap *pmu_ev;
441
442	list_for_each_entry(pmu_ev, &info->event_caps_fixed, node) {
443		if (vid != pmu_ev->vid || gid != pmu_ev->gid)
444			continue;
445
446		/* Precise match for fixed counter */
447		if (msk == pmu_ev->msk)
448			return pmu_ev;
449	}
450
451	return ERR_PTR(-EINVAL);
452}
453
454static struct cxl_pmu_ev_cap *cxl_pmu_find_config_counter_ev_cap(struct cxl_pmu_info *info,
455								 int vid, int gid, int msk)
456{
457	struct cxl_pmu_ev_cap *pmu_ev;
458
459	list_for_each_entry(pmu_ev, &info->event_caps_configurable, node) {
460		if (vid != pmu_ev->vid || gid != pmu_ev->gid)
461			continue;
462
463		/* Request mask must be subset of supported */
464		if (msk & ~pmu_ev->msk)
465			continue;
466
467		return pmu_ev;
468	}
469
470	return ERR_PTR(-EINVAL);
471}
472
473static umode_t cxl_pmu_event_is_visible(struct kobject *kobj, struct attribute *attr, int a)
474{
475	struct device_attribute *dev_attr = container_of(attr, struct device_attribute, attr);
476	struct perf_pmu_events_attr *pmu_attr =
477		container_of(dev_attr, struct perf_pmu_events_attr, attr);
478	struct device *dev = kobj_to_dev(kobj);
479	struct cxl_pmu_info *info = dev_get_drvdata(dev);
480	int vid = FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, pmu_attr->id);
481	int gid = FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, pmu_attr->id);
482	int msk = FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, pmu_attr->id);
483
484	if (!IS_ERR(cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, msk)))
485		return attr->mode;
486
487	if (!IS_ERR(cxl_pmu_find_config_counter_ev_cap(info, vid, gid, msk)))
488		return attr->mode;
489
490	return 0;
491}
492
493static const struct attribute_group cxl_pmu_events = {
494	.name = "events",
495	.attrs = cxl_pmu_event_attrs,
496	.is_visible = cxl_pmu_event_is_visible,
497};
498
499static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
500			    char *buf)
501{
502	struct cxl_pmu_info *info = dev_get_drvdata(dev);
503
504	return cpumap_print_to_pagebuf(true, buf, cpumask_of(info->on_cpu));
505}
506static DEVICE_ATTR_RO(cpumask);
507
508static struct attribute *cxl_pmu_cpumask_attrs[] = {
509	&dev_attr_cpumask.attr,
510	NULL
511};
512
513static const struct attribute_group cxl_pmu_cpumask_group = {
514	.attrs = cxl_pmu_cpumask_attrs,
515};
516
517static const struct attribute_group *cxl_pmu_attr_groups[] = {
518	&cxl_pmu_events,
519	&cxl_pmu_format_group,
520	&cxl_pmu_cpumask_group,
521	NULL
522};
523
524/* If counter_idx == NULL, don't try to allocate a counter. */
525static int cxl_pmu_get_event_idx(struct perf_event *event, int *counter_idx,
526				 int *event_idx)
527{
528	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
529	DECLARE_BITMAP(configurable_and_free, CXL_PMU_MAX_COUNTERS);
530	struct cxl_pmu_ev_cap *pmu_ev;
531	u32 mask;
532	u16 gid, vid;
533	int i;
534
535	vid = cxl_pmu_config_get_vid(event);
536	gid = cxl_pmu_config_get_gid(event);
537	mask = cxl_pmu_config_get_mask(event);
538
539	pmu_ev = cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, mask);
540	if (!IS_ERR(pmu_ev)) {
541		if (!counter_idx)
542			return 0;
543		if (!test_bit(pmu_ev->counter_idx, info->used_counter_bm)) {
544			*counter_idx = pmu_ev->counter_idx;
545			return 0;
546		}
547		/* Fixed counter is in use, but maybe a configurable one? */
548	}
549
550	pmu_ev = cxl_pmu_find_config_counter_ev_cap(info, vid, gid, mask);
551	if (!IS_ERR(pmu_ev)) {
552		if (!counter_idx)
553			return 0;
554
555		bitmap_andnot(configurable_and_free, info->conf_counter_bm,
556			info->used_counter_bm, CXL_PMU_MAX_COUNTERS);
557
558		i = find_first_bit(configurable_and_free, CXL_PMU_MAX_COUNTERS);
559		if (i == CXL_PMU_MAX_COUNTERS)
560			return -EINVAL;
561
562		*counter_idx = i;
563		return 0;
564	}
565
566	return -EINVAL;
567}
568
569static int cxl_pmu_event_init(struct perf_event *event)
570{
571	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
572	int rc;
573
574	/* Top level type sanity check - is this a Hardware Event being requested */
575	if (event->attr.type != event->pmu->type)
576		return -ENOENT;
577
578	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
579		return -EOPNOTSUPP;
580	/* TODO: Validation of any filter */
581
582	/*
583	 * Verify that it is possible to count what was requested. Either must
584	 * be a fixed counter that is a precise match or a configurable counter
585	 * where this is a subset.
586	 */
587	rc = cxl_pmu_get_event_idx(event, NULL, NULL);
588	if (rc < 0)
589		return rc;
590
591	event->cpu = info->on_cpu;
592
593	return 0;
594}
595
596static void cxl_pmu_enable(struct pmu *pmu)
597{
598	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu);
599	void __iomem *base = info->base;
600
601	/* Can assume frozen at this stage */
602	writeq(0, base + CXL_PMU_FREEZE_REG);
603}
604
605static void cxl_pmu_disable(struct pmu *pmu)
606{
607	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu);
608	void __iomem *base = info->base;
609
610	/*
611	 * Whilst bits above number of counters are RsvdZ
612	 * they are unlikely to be repurposed given
613	 * number of counters is allowed to be 64 leaving
614	 * no reserved bits.  Hence this is only slightly
615	 * naughty.
616	 */
617	writeq(GENMASK_ULL(63, 0), base + CXL_PMU_FREEZE_REG);
618}
619
620static void cxl_pmu_event_start(struct perf_event *event, int flags)
621{
622	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
623	struct hw_perf_event *hwc = &event->hw;
624	void __iomem *base = info->base;
625	u64 cfg;
626
627	/*
628	 * All paths to here should either set these flags directly or
629	 * call cxl_pmu_event_stop() which will ensure the correct state.
630	 */
631	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
632		return;
633
634	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
635	hwc->state = 0;
636
637	/*
638	 * Currently only hdm filter control is implemnted, this code will
639	 * want generalizing when more filters are added.
640	 */
641	if (info->filter_hdm) {
642		if (cxl_pmu_config1_hdm_filter_en(event))
643			cfg = cxl_pmu_config2_get_hdm_decoder(event);
644		else
645			cfg = GENMASK(31, 0); /* No filtering if 0xFFFF_FFFF */
646		writeq(cfg, base + CXL_PMU_FILTER_CFG_REG(hwc->idx, 0));
647	}
648
649	cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
650	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1);
651	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW, 1);
652	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1);
653	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EDGE,
654			  cxl_pmu_config1_get_edge(event) ? 1 : 0);
655	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INVERT,
656			  cxl_pmu_config1_get_invert(event) ? 1 : 0);
657
658	/* Fixed purpose counters have next two fields RO */
659	if (test_bit(hwc->idx, info->conf_counter_bm)) {
660		cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK,
661				  hwc->event_base);
662		cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENTS_MSK,
663				  cxl_pmu_config_get_mask(event));
664	}
665	cfg &= ~CXL_PMU_COUNTER_CFG_THRESHOLD_MSK;
666	/*
667	 * For events that generate only 1 count per clock the CXL 3.0 spec
668	 * states the threshold shall be set to 1 but if set to 0 it will
669	 * count the raw value anwyay?
670	 * There is no definition of what events will count multiple per cycle
671	 * and hence to which non 1 values of threshold can apply.
672	 * (CXL 3.0 8.2.7.2.1 Counter Configuration - threshold field definition)
673	 */
674	cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_THRESHOLD_MSK,
675			  cxl_pmu_config1_get_threshold(event));
676	writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
677
678	local64_set(&hwc->prev_count, 0);
679	writeq(0, base + CXL_PMU_COUNTER_REG(hwc->idx));
680
681	perf_event_update_userpage(event);
682}
683
684static u64 cxl_pmu_read_counter(struct perf_event *event)
685{
686	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
687	void __iomem *base = info->base;
688
689	return readq(base + CXL_PMU_COUNTER_REG(event->hw.idx));
690}
691
692static void __cxl_pmu_read(struct perf_event *event, bool overflow)
693{
694	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
695	struct hw_perf_event *hwc = &event->hw;
696	u64 new_cnt, prev_cnt, delta;
697
698	do {
699		prev_cnt = local64_read(&hwc->prev_count);
700		new_cnt = cxl_pmu_read_counter(event);
701	} while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) != prev_cnt);
702
703	/*
704	 * If we know an overflow occur then take that into account.
705	 * Note counter is not reset as that would lose events
706	 */
707	delta = (new_cnt - prev_cnt) & GENMASK_ULL(info->counter_width - 1, 0);
708	if (overflow && delta < GENMASK_ULL(info->counter_width - 1, 0))
709		delta += (1UL << info->counter_width);
710
711	local64_add(delta, &event->count);
712}
713
714static void cxl_pmu_read(struct perf_event *event)
715{
716	__cxl_pmu_read(event, false);
717}
718
719static void cxl_pmu_event_stop(struct perf_event *event, int flags)
720{
721	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
722	void __iomem *base = info->base;
723	struct hw_perf_event *hwc = &event->hw;
724	u64 cfg;
725
726	cxl_pmu_read(event);
727	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
728	hwc->state |= PERF_HES_STOPPED;
729
730	cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
731	cfg &= ~(FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1) |
732		 FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1));
733	writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
734
735	hwc->state |= PERF_HES_UPTODATE;
736}
737
738static int cxl_pmu_event_add(struct perf_event *event, int flags)
739{
740	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
741	struct hw_perf_event *hwc = &event->hw;
742	int idx, rc;
743	int event_idx = 0;
744
745	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
746
747	rc = cxl_pmu_get_event_idx(event, &idx, &event_idx);
748	if (rc < 0)
749		return rc;
750
751	hwc->idx = idx;
752
753	/* Only set for configurable counters */
754	hwc->event_base = event_idx;
755	info->hw_events[idx] = event;
756	set_bit(idx, info->used_counter_bm);
757
758	if (flags & PERF_EF_START)
759		cxl_pmu_event_start(event, PERF_EF_RELOAD);
760
761	return 0;
762}
763
764static void cxl_pmu_event_del(struct perf_event *event, int flags)
765{
766	struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
767	struct hw_perf_event *hwc = &event->hw;
768
769	cxl_pmu_event_stop(event, PERF_EF_UPDATE);
770	clear_bit(hwc->idx, info->used_counter_bm);
771	info->hw_events[hwc->idx] = NULL;
772	perf_event_update_userpage(event);
773}
774
775static irqreturn_t cxl_pmu_irq(int irq, void *data)
776{
777	struct cxl_pmu_info *info = data;
778	void __iomem *base = info->base;
779	u64 overflowed;
780	DECLARE_BITMAP(overflowedbm, 64);
781	int i;
782
783	overflowed = readq(base + CXL_PMU_OVERFLOW_REG);
784
785	/* Interrupt may be shared, so maybe it isn't ours */
786	if (!overflowed)
787		return IRQ_NONE;
788
789	bitmap_from_arr64(overflowedbm, &overflowed, 64);
790	for_each_set_bit(i, overflowedbm, info->num_counters) {
791		struct perf_event *event = info->hw_events[i];
792
793		if (!event) {
794			dev_dbg(info->pmu.dev,
795				"overflow but on non enabled counter %d\n", i);
796			continue;
797		}
798
799		__cxl_pmu_read(event, true);
800	}
801
802	writeq(overflowed, base + CXL_PMU_OVERFLOW_REG);
803
804	return IRQ_HANDLED;
805}
806
807static void cxl_pmu_perf_unregister(void *_info)
808{
809	struct cxl_pmu_info *info = _info;
810
811	perf_pmu_unregister(&info->pmu);
812}
813
814static void cxl_pmu_cpuhp_remove(void *_info)
815{
816	struct cxl_pmu_info *info = _info;
817
818	cpuhp_state_remove_instance_nocalls(cxl_pmu_cpuhp_state_num, &info->node);
819}
820
821static int cxl_pmu_probe(struct device *dev)
822{
823	struct cxl_pmu *pmu = to_cxl_pmu(dev);
824	struct pci_dev *pdev = to_pci_dev(dev->parent);
825	struct cxl_pmu_info *info;
826	char *irq_name;
827	char *dev_name;
828	int rc, irq;
829
830	info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
831	if (!info)
832		return -ENOMEM;
833
834	dev_set_drvdata(dev, info);
835	INIT_LIST_HEAD(&info->event_caps_fixed);
836	INIT_LIST_HEAD(&info->event_caps_configurable);
837
838	info->base = pmu->base;
839
840	info->on_cpu = -1;
841	rc = cxl_pmu_parse_caps(dev, info);
842	if (rc)
843		return rc;
844
845	info->hw_events = devm_kcalloc(dev, sizeof(*info->hw_events),
846				       info->num_counters, GFP_KERNEL);
847	if (!info->hw_events)
848		return -ENOMEM;
849
850	switch (pmu->type) {
851	case CXL_PMU_MEMDEV:
852		dev_name = devm_kasprintf(dev, GFP_KERNEL, "cxl_pmu_mem%d.%d",
853					  pmu->assoc_id, pmu->index);
854		break;
855	}
856	if (!dev_name)
857		return -ENOMEM;
858
859	info->pmu = (struct pmu) {
860		.name = dev_name,
861		.parent = dev,
862		.module = THIS_MODULE,
863		.event_init = cxl_pmu_event_init,
864		.pmu_enable = cxl_pmu_enable,
865		.pmu_disable = cxl_pmu_disable,
866		.add = cxl_pmu_event_add,
867		.del = cxl_pmu_event_del,
868		.start = cxl_pmu_event_start,
869		.stop = cxl_pmu_event_stop,
870		.read = cxl_pmu_read,
871		.task_ctx_nr = perf_invalid_context,
872		.attr_groups = cxl_pmu_attr_groups,
873		.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
874	};
875
876	if (info->irq <= 0)
877		return -EINVAL;
878
879	rc = pci_irq_vector(pdev, info->irq);
880	if (rc < 0)
881		return rc;
882	irq = rc;
883
884	irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_overflow\n", dev_name);
885	if (!irq_name)
886		return -ENOMEM;
887
888	rc = devm_request_irq(dev, irq, cxl_pmu_irq, IRQF_SHARED | IRQF_ONESHOT,
889			      irq_name, info);
890	if (rc)
891		return rc;
892	info->irq = irq;
893
894	rc = cpuhp_state_add_instance(cxl_pmu_cpuhp_state_num, &info->node);
895	if (rc)
896		return rc;
897
898	rc = devm_add_action_or_reset(dev, cxl_pmu_cpuhp_remove, info);
899	if (rc)
900		return rc;
901
902	rc = perf_pmu_register(&info->pmu, info->pmu.name, -1);
903	if (rc)
904		return rc;
905
906	rc = devm_add_action_or_reset(dev, cxl_pmu_perf_unregister, info);
907	if (rc)
908		return rc;
909
910	return 0;
911}
912
913static struct cxl_driver cxl_pmu_driver = {
914	.name = "cxl_pmu",
915	.probe = cxl_pmu_probe,
916	.id = CXL_DEVICE_PMU,
917};
918
919static int cxl_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
920{
921	struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node);
922
923	if (info->on_cpu != -1)
924		return 0;
925
926	info->on_cpu = cpu;
927	/*
928	 * CPU HP lock is held so we should be guaranteed that the CPU hasn't yet
929	 * gone away again.
930	 */
931	WARN_ON(irq_set_affinity(info->irq, cpumask_of(cpu)));
932
933	return 0;
934}
935
936static int cxl_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
937{
938	struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node);
939	unsigned int target;
940
941	if (info->on_cpu != cpu)
942		return 0;
943
944	info->on_cpu = -1;
945	target = cpumask_any_but(cpu_online_mask, cpu);
946	if (target >= nr_cpu_ids) {
947		dev_err(info->pmu.dev, "Unable to find a suitable CPU\n");
948		return 0;
949	}
950
951	perf_pmu_migrate_context(&info->pmu, cpu, target);
952	info->on_cpu = target;
953	/*
954	 * CPU HP lock is held so we should be guaranteed that this CPU hasn't yet
955	 * gone away.
956	 */
957	WARN_ON(irq_set_affinity(info->irq, cpumask_of(target)));
958
959	return 0;
960}
961
962static __init int cxl_pmu_init(void)
963{
964	int rc;
965
966	rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
967				     "AP_PERF_CXL_PMU_ONLINE",
968				     cxl_pmu_online_cpu, cxl_pmu_offline_cpu);
969	if (rc < 0)
970		return rc;
971	cxl_pmu_cpuhp_state_num = rc;
972
973	rc = cxl_driver_register(&cxl_pmu_driver);
974	if (rc)
975		cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
976
977	return rc;
978}
979
980static __exit void cxl_pmu_exit(void)
981{
982	cxl_driver_unregister(&cxl_pmu_driver);
983	cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
984}
985
986MODULE_LICENSE("GPL");
987MODULE_IMPORT_NS(CXL);
988module_init(cxl_pmu_init);
989module_exit(cxl_pmu_exit);
990MODULE_ALIAS_CXL(CXL_DEVICE_PMU);
991