1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2013 Advanced Micro Devices, Inc.
4 *
5 * Author: Steven Kinney <Steven.Kinney@amd.com>
6 * Author: Suravee Suthikulpanit <Suraveee.Suthikulpanit@amd.com>
7 *
8 * Perf: amd_iommu - AMD IOMMU Performance Counter PMU implementation
9 */
10
11#define pr_fmt(fmt)	"perf/amd_iommu: " fmt
12
13#include <linux/perf_event.h>
14#include <linux/init.h>
15#include <linux/cpumask.h>
16#include <linux/slab.h>
17#include <linux/amd-iommu.h>
18
19#include "../perf_event.h"
20#include "iommu.h"
21
22/* iommu pmu conf masks */
23#define GET_CSOURCE(x)     ((x)->conf & 0xFFULL)
24#define GET_DEVID(x)       (((x)->conf >> 8)  & 0xFFFFULL)
25#define GET_DOMID(x)       (((x)->conf >> 24) & 0xFFFFULL)
26#define GET_PASID(x)       (((x)->conf >> 40) & 0xFFFFFULL)
27
28/* iommu pmu conf1 masks */
29#define GET_DEVID_MASK(x)  ((x)->conf1  & 0xFFFFULL)
30#define GET_DOMID_MASK(x)  (((x)->conf1 >> 16) & 0xFFFFULL)
31#define GET_PASID_MASK(x)  (((x)->conf1 >> 32) & 0xFFFFFULL)
32
33#define IOMMU_NAME_SIZE 16
34
35struct perf_amd_iommu {
36	struct list_head list;
37	struct pmu pmu;
38	struct amd_iommu *iommu;
39	char name[IOMMU_NAME_SIZE];
40	u8 max_banks;
41	u8 max_counters;
42	u64 cntr_assign_mask;
43	raw_spinlock_t lock;
44};
45
46static LIST_HEAD(perf_amd_iommu_list);
47
48/*---------------------------------------------
49 * sysfs format attributes
50 *---------------------------------------------*/
51PMU_FORMAT_ATTR(csource,    "config:0-7");
52PMU_FORMAT_ATTR(devid,      "config:8-23");
53PMU_FORMAT_ATTR(domid,      "config:24-39");
54PMU_FORMAT_ATTR(pasid,      "config:40-59");
55PMU_FORMAT_ATTR(devid_mask, "config1:0-15");
56PMU_FORMAT_ATTR(domid_mask, "config1:16-31");
57PMU_FORMAT_ATTR(pasid_mask, "config1:32-51");
58
59static struct attribute *iommu_format_attrs[] = {
60	&format_attr_csource.attr,
61	&format_attr_devid.attr,
62	&format_attr_pasid.attr,
63	&format_attr_domid.attr,
64	&format_attr_devid_mask.attr,
65	&format_attr_pasid_mask.attr,
66	&format_attr_domid_mask.attr,
67	NULL,
68};
69
70static struct attribute_group amd_iommu_format_group = {
71	.name = "format",
72	.attrs = iommu_format_attrs,
73};
74
75/*---------------------------------------------
76 * sysfs events attributes
77 *---------------------------------------------*/
78static struct attribute_group amd_iommu_events_group = {
79	.name = "events",
80};
81
82struct amd_iommu_event_desc {
83	struct device_attribute attr;
84	const char *event;
85};
86
87static ssize_t _iommu_event_show(struct device *dev,
88				struct device_attribute *attr, char *buf)
89{
90	struct amd_iommu_event_desc *event =
91		container_of(attr, struct amd_iommu_event_desc, attr);
92	return sprintf(buf, "%s\n", event->event);
93}
94
95#define AMD_IOMMU_EVENT_DESC(_name, _event)			\
96{								\
97	.attr  = __ATTR(_name, 0444, _iommu_event_show, NULL),	\
98	.event = _event,					\
99}
100
101static struct amd_iommu_event_desc amd_iommu_v2_event_descs[] = {
102	AMD_IOMMU_EVENT_DESC(mem_pass_untrans,        "csource=0x01"),
103	AMD_IOMMU_EVENT_DESC(mem_pass_pretrans,       "csource=0x02"),
104	AMD_IOMMU_EVENT_DESC(mem_pass_excl,           "csource=0x03"),
105	AMD_IOMMU_EVENT_DESC(mem_target_abort,        "csource=0x04"),
106	AMD_IOMMU_EVENT_DESC(mem_trans_total,         "csource=0x05"),
107	AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_hit,   "csource=0x06"),
108	AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pte_mis,   "csource=0x07"),
109	AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_hit,   "csource=0x08"),
110	AMD_IOMMU_EVENT_DESC(mem_iommu_tlb_pde_mis,   "csource=0x09"),
111	AMD_IOMMU_EVENT_DESC(mem_dte_hit,             "csource=0x0a"),
112	AMD_IOMMU_EVENT_DESC(mem_dte_mis,             "csource=0x0b"),
113	AMD_IOMMU_EVENT_DESC(page_tbl_read_tot,       "csource=0x0c"),
114	AMD_IOMMU_EVENT_DESC(page_tbl_read_nst,       "csource=0x0d"),
115	AMD_IOMMU_EVENT_DESC(page_tbl_read_gst,       "csource=0x0e"),
116	AMD_IOMMU_EVENT_DESC(int_dte_hit,             "csource=0x0f"),
117	AMD_IOMMU_EVENT_DESC(int_dte_mis,             "csource=0x10"),
118	AMD_IOMMU_EVENT_DESC(cmd_processed,           "csource=0x11"),
119	AMD_IOMMU_EVENT_DESC(cmd_processed_inv,       "csource=0x12"),
120	AMD_IOMMU_EVENT_DESC(tlb_inv,                 "csource=0x13"),
121	AMD_IOMMU_EVENT_DESC(ign_rd_wr_mmio_1ff8h,    "csource=0x14"),
122	AMD_IOMMU_EVENT_DESC(vapic_int_non_guest,     "csource=0x15"),
123	AMD_IOMMU_EVENT_DESC(vapic_int_guest,         "csource=0x16"),
124	AMD_IOMMU_EVENT_DESC(smi_recv,                "csource=0x17"),
125	AMD_IOMMU_EVENT_DESC(smi_blk,                 "csource=0x18"),
126	{ /* end: all zeroes */ },
127};
128
129/*---------------------------------------------
130 * sysfs cpumask attributes
131 *---------------------------------------------*/
132static cpumask_t iommu_cpumask;
133
134static ssize_t _iommu_cpumask_show(struct device *dev,
135				   struct device_attribute *attr,
136				   char *buf)
137{
138	return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask);
139}
140static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
141
142static struct attribute *iommu_cpumask_attrs[] = {
143	&dev_attr_cpumask.attr,
144	NULL,
145};
146
147static struct attribute_group amd_iommu_cpumask_group = {
148	.attrs = iommu_cpumask_attrs,
149};
150
151/*---------------------------------------------*/
152
153static int get_next_avail_iommu_bnk_cntr(struct perf_event *event)
154{
155	struct perf_amd_iommu *piommu = container_of(event->pmu, struct perf_amd_iommu, pmu);
156	int max_cntrs = piommu->max_counters;
157	int max_banks = piommu->max_banks;
158	u32 shift, bank, cntr;
159	unsigned long flags;
160	int retval;
161
162	raw_spin_lock_irqsave(&piommu->lock, flags);
163
164	for (bank = 0; bank < max_banks; bank++) {
165		for (cntr = 0; cntr < max_cntrs; cntr++) {
166			shift = bank + (bank*3) + cntr;
167			if (piommu->cntr_assign_mask & BIT_ULL(shift)) {
168				continue;
169			} else {
170				piommu->cntr_assign_mask |= BIT_ULL(shift);
171				event->hw.iommu_bank = bank;
172				event->hw.iommu_cntr = cntr;
173				retval = 0;
174				goto out;
175			}
176		}
177	}
178	retval = -ENOSPC;
179out:
180	raw_spin_unlock_irqrestore(&piommu->lock, flags);
181	return retval;
182}
183
184static int clear_avail_iommu_bnk_cntr(struct perf_amd_iommu *perf_iommu,
185					u8 bank, u8 cntr)
186{
187	unsigned long flags;
188	int max_banks, max_cntrs;
189	int shift = 0;
190
191	max_banks = perf_iommu->max_banks;
192	max_cntrs = perf_iommu->max_counters;
193
194	if ((bank > max_banks) || (cntr > max_cntrs))
195		return -EINVAL;
196
197	shift = bank + cntr + (bank*3);
198
199	raw_spin_lock_irqsave(&perf_iommu->lock, flags);
200	perf_iommu->cntr_assign_mask &= ~(1ULL<<shift);
201	raw_spin_unlock_irqrestore(&perf_iommu->lock, flags);
202
203	return 0;
204}
205
206static int perf_iommu_event_init(struct perf_event *event)
207{
208	struct hw_perf_event *hwc = &event->hw;
209
210	/* test the event attr type check for PMU enumeration */
211	if (event->attr.type != event->pmu->type)
212		return -ENOENT;
213
214	/*
215	 * IOMMU counters are shared across all cores.
216	 * Therefore, it does not support per-process mode.
217	 * Also, it does not support event sampling mode.
218	 */
219	if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
220		return -EINVAL;
221
222	if (event->cpu < 0)
223		return -EINVAL;
224
225	/* update the hw_perf_event struct with the iommu config data */
226	hwc->conf  = event->attr.config;
227	hwc->conf1 = event->attr.config1;
228
229	return 0;
230}
231
232static inline struct amd_iommu *perf_event_2_iommu(struct perf_event *ev)
233{
234	return (container_of(ev->pmu, struct perf_amd_iommu, pmu))->iommu;
235}
236
237static void perf_iommu_enable_event(struct perf_event *ev)
238{
239	struct amd_iommu *iommu = perf_event_2_iommu(ev);
240	struct hw_perf_event *hwc = &ev->hw;
241	u8 bank = hwc->iommu_bank;
242	u8 cntr = hwc->iommu_cntr;
243	u64 reg = 0ULL;
244
245	reg = GET_CSOURCE(hwc);
246	amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_COUNTER_SRC_REG, &reg);
247
248	reg = GET_DEVID_MASK(hwc);
249	reg = GET_DEVID(hwc) | (reg << 32);
250	if (reg)
251		reg |= BIT(31);
252	amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DEVID_MATCH_REG, &reg);
253
254	reg = GET_PASID_MASK(hwc);
255	reg = GET_PASID(hwc) | (reg << 32);
256	if (reg)
257		reg |= BIT(31);
258	amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_PASID_MATCH_REG, &reg);
259
260	reg = GET_DOMID_MASK(hwc);
261	reg = GET_DOMID(hwc) | (reg << 32);
262	if (reg)
263		reg |= BIT(31);
264	amd_iommu_pc_set_reg(iommu, bank, cntr, IOMMU_PC_DOMID_MATCH_REG, &reg);
265}
266
267static void perf_iommu_disable_event(struct perf_event *event)
268{
269	struct amd_iommu *iommu = perf_event_2_iommu(event);
270	struct hw_perf_event *hwc = &event->hw;
271	u64 reg = 0ULL;
272
273	amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
274			     IOMMU_PC_COUNTER_SRC_REG, &reg);
275}
276
277static void perf_iommu_start(struct perf_event *event, int flags)
278{
279	struct hw_perf_event *hwc = &event->hw;
280
281	if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
282		return;
283
284	WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
285	hwc->state = 0;
286
287	/*
288	 * To account for power-gating, which prevents write to
289	 * the counter, we need to enable the counter
290	 * before setting up counter register.
291	 */
292	perf_iommu_enable_event(event);
293
294	if (flags & PERF_EF_RELOAD) {
295		u64 count = 0;
296		struct amd_iommu *iommu = perf_event_2_iommu(event);
297
298		/*
299		 * Since the IOMMU PMU only support counting mode,
300		 * the counter always start with value zero.
301		 */
302		amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
303				     IOMMU_PC_COUNTER_REG, &count);
304	}
305
306	perf_event_update_userpage(event);
307}
308
309static void perf_iommu_read(struct perf_event *event)
310{
311	u64 count;
312	struct hw_perf_event *hwc = &event->hw;
313	struct amd_iommu *iommu = perf_event_2_iommu(event);
314
315	if (amd_iommu_pc_get_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr,
316				 IOMMU_PC_COUNTER_REG, &count))
317		return;
318
319	/* IOMMU pc counter register is only 48 bits */
320	count &= GENMASK_ULL(47, 0);
321
322	/*
323	 * Since the counter always start with value zero,
324	 * simply just accumulate the count for the event.
325	 */
326	local64_add(count, &event->count);
327}
328
329static void perf_iommu_stop(struct perf_event *event, int flags)
330{
331	struct hw_perf_event *hwc = &event->hw;
332
333	if (hwc->state & PERF_HES_UPTODATE)
334		return;
335
336	/*
337	 * To account for power-gating, in which reading the counter would
338	 * return zero, we need to read the register before disabling.
339	 */
340	perf_iommu_read(event);
341	hwc->state |= PERF_HES_UPTODATE;
342
343	perf_iommu_disable_event(event);
344	WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
345	hwc->state |= PERF_HES_STOPPED;
346}
347
348static int perf_iommu_add(struct perf_event *event, int flags)
349{
350	int retval;
351
352	event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
353
354	/* request an iommu bank/counter */
355	retval = get_next_avail_iommu_bnk_cntr(event);
356	if (retval)
357		return retval;
358
359	if (flags & PERF_EF_START)
360		perf_iommu_start(event, PERF_EF_RELOAD);
361
362	return 0;
363}
364
365static void perf_iommu_del(struct perf_event *event, int flags)
366{
367	struct hw_perf_event *hwc = &event->hw;
368	struct perf_amd_iommu *perf_iommu =
369			container_of(event->pmu, struct perf_amd_iommu, pmu);
370
371	perf_iommu_stop(event, PERF_EF_UPDATE);
372
373	/* clear the assigned iommu bank/counter */
374	clear_avail_iommu_bnk_cntr(perf_iommu,
375				   hwc->iommu_bank, hwc->iommu_cntr);
376
377	perf_event_update_userpage(event);
378}
379
380static __init int _init_events_attrs(void)
381{
382	int i = 0, j;
383	struct attribute **attrs;
384
385	while (amd_iommu_v2_event_descs[i].attr.attr.name)
386		i++;
387
388	attrs = kcalloc(i + 1, sizeof(*attrs), GFP_KERNEL);
389	if (!attrs)
390		return -ENOMEM;
391
392	for (j = 0; j < i; j++)
393		attrs[j] = &amd_iommu_v2_event_descs[j].attr.attr;
394
395	amd_iommu_events_group.attrs = attrs;
396	return 0;
397}
398
399static const struct attribute_group *amd_iommu_attr_groups[] = {
400	&amd_iommu_format_group,
401	&amd_iommu_cpumask_group,
402	&amd_iommu_events_group,
403	NULL,
404};
405
406static const struct pmu iommu_pmu __initconst = {
407	.event_init	= perf_iommu_event_init,
408	.add		= perf_iommu_add,
409	.del		= perf_iommu_del,
410	.start		= perf_iommu_start,
411	.stop		= perf_iommu_stop,
412	.read		= perf_iommu_read,
413	.task_ctx_nr	= perf_invalid_context,
414	.attr_groups	= amd_iommu_attr_groups,
415	.capabilities	= PERF_PMU_CAP_NO_EXCLUDE,
416};
417
418static __init int init_one_iommu(unsigned int idx)
419{
420	struct perf_amd_iommu *perf_iommu;
421	int ret;
422
423	perf_iommu = kzalloc(sizeof(struct perf_amd_iommu), GFP_KERNEL);
424	if (!perf_iommu)
425		return -ENOMEM;
426
427	raw_spin_lock_init(&perf_iommu->lock);
428
429	perf_iommu->pmu          = iommu_pmu;
430	perf_iommu->iommu        = get_amd_iommu(idx);
431	perf_iommu->max_banks    = amd_iommu_pc_get_max_banks(idx);
432	perf_iommu->max_counters = amd_iommu_pc_get_max_counters(idx);
433
434	if (!perf_iommu->iommu ||
435	    !perf_iommu->max_banks ||
436	    !perf_iommu->max_counters) {
437		kfree(perf_iommu);
438		return -EINVAL;
439	}
440
441	snprintf(perf_iommu->name, IOMMU_NAME_SIZE, "amd_iommu_%u", idx);
442
443	ret = perf_pmu_register(&perf_iommu->pmu, perf_iommu->name, -1);
444	if (!ret) {
445		pr_info("Detected AMD IOMMU #%d (%d banks, %d counters/bank).\n",
446			idx, perf_iommu->max_banks, perf_iommu->max_counters);
447		list_add_tail(&perf_iommu->list, &perf_amd_iommu_list);
448	} else {
449		pr_warn("Error initializing IOMMU %d.\n", idx);
450		kfree(perf_iommu);
451	}
452	return ret;
453}
454
455static __init int amd_iommu_pc_init(void)
456{
457	unsigned int i, cnt = 0;
458	int ret;
459
460	/* Make sure the IOMMU PC resource is available */
461	if (!amd_iommu_pc_supported())
462		return -ENODEV;
463
464	ret = _init_events_attrs();
465	if (ret)
466		return ret;
467
468	/*
469	 * An IOMMU PMU is specific to an IOMMU, and can function independently.
470	 * So we go through all IOMMUs and ignore the one that fails init
471	 * unless all IOMMU are failing.
472	 */
473	for (i = 0; i < amd_iommu_get_num_iommus(); i++) {
474		ret = init_one_iommu(i);
475		if (!ret)
476			cnt++;
477	}
478
479	if (!cnt) {
480		kfree(amd_iommu_events_group.attrs);
481		return -ENODEV;
482	}
483
484	/* Init cpumask attributes to only core 0 */
485	cpumask_set_cpu(0, &iommu_cpumask);
486	return 0;
487}
488
489device_initcall(amd_iommu_pc_init);
490