1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Xtensa Performance Monitor Module driver
4 * See Tensilica Debug User's Guide for PMU registers documentation.
5 *
6 * Copyright (C) 2015 Cadence Design Systems Inc.
7 */
8
9#include <linux/interrupt.h>
10#include <linux/irqdomain.h>
11#include <linux/module.h>
12#include <linux/of.h>
13#include <linux/perf_event.h>
14#include <linux/platform_device.h>
15
16#include <asm/core.h>
17#include <asm/processor.h>
18#include <asm/stacktrace.h>
19
20#define XTENSA_HWVERSION_RG_2015_0	260000
21
22#if XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RG_2015_0
23#define XTENSA_PMU_ERI_BASE		0x00101000
24#else
25#define XTENSA_PMU_ERI_BASE		0x00001000
26#endif
27
28/* Global control/status for all perf counters */
29#define XTENSA_PMU_PMG			XTENSA_PMU_ERI_BASE
30/* Perf counter values */
31#define XTENSA_PMU_PM(i)		(XTENSA_PMU_ERI_BASE + 0x80 + (i) * 4)
32/* Perf counter control registers */
33#define XTENSA_PMU_PMCTRL(i)		(XTENSA_PMU_ERI_BASE + 0x100 + (i) * 4)
34/* Perf counter status registers */
35#define XTENSA_PMU_PMSTAT(i)		(XTENSA_PMU_ERI_BASE + 0x180 + (i) * 4)
36
37#define XTENSA_PMU_PMG_PMEN		0x1
38
39#define XTENSA_PMU_COUNTER_MASK		0xffffffffULL
40#define XTENSA_PMU_COUNTER_MAX		0x7fffffff
41
42#define XTENSA_PMU_PMCTRL_INTEN		0x00000001
43#define XTENSA_PMU_PMCTRL_KRNLCNT	0x00000008
44#define XTENSA_PMU_PMCTRL_TRACELEVEL	0x000000f0
45#define XTENSA_PMU_PMCTRL_SELECT_SHIFT	8
46#define XTENSA_PMU_PMCTRL_SELECT	0x00001f00
47#define XTENSA_PMU_PMCTRL_MASK_SHIFT	16
48#define XTENSA_PMU_PMCTRL_MASK		0xffff0000
49
50#define XTENSA_PMU_MASK(select, mask) \
51	(((select) << XTENSA_PMU_PMCTRL_SELECT_SHIFT) | \
52	 ((mask) << XTENSA_PMU_PMCTRL_MASK_SHIFT) | \
53	 XTENSA_PMU_PMCTRL_TRACELEVEL | \
54	 XTENSA_PMU_PMCTRL_INTEN)
55
56#define XTENSA_PMU_PMSTAT_OVFL		0x00000001
57#define XTENSA_PMU_PMSTAT_INTASRT	0x00000010
58
59struct xtensa_pmu_events {
60	/* Array of events currently on this core */
61	struct perf_event *event[XCHAL_NUM_PERF_COUNTERS];
62	/* Bitmap of used hardware counters */
63	unsigned long used_mask[BITS_TO_LONGS(XCHAL_NUM_PERF_COUNTERS)];
64};
65static DEFINE_PER_CPU(struct xtensa_pmu_events, xtensa_pmu_events);
66
67static const u32 xtensa_hw_ctl[] = {
68	[PERF_COUNT_HW_CPU_CYCLES]		= XTENSA_PMU_MASK(0, 0x1),
69	[PERF_COUNT_HW_INSTRUCTIONS]		= XTENSA_PMU_MASK(2, 0xffff),
70	[PERF_COUNT_HW_CACHE_REFERENCES]	= XTENSA_PMU_MASK(10, 0x1),
71	[PERF_COUNT_HW_CACHE_MISSES]		= XTENSA_PMU_MASK(12, 0x1),
72	/* Taken and non-taken branches + taken loop ends */
73	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= XTENSA_PMU_MASK(2, 0x490),
74	/* Instruction-related + other global stall cycles */
75	[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND]	= XTENSA_PMU_MASK(4, 0x1ff),
76	/* Data-related global stall cycles */
77	[PERF_COUNT_HW_STALLED_CYCLES_BACKEND]	= XTENSA_PMU_MASK(3, 0x1ff),
78};
79
80#define C(_x) PERF_COUNT_HW_CACHE_##_x
81
82static const u32 xtensa_cache_ctl[][C(OP_MAX)][C(RESULT_MAX)] = {
83	[C(L1D)] = {
84		[C(OP_READ)] = {
85			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(10, 0x1),
86			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(10, 0x2),
87		},
88		[C(OP_WRITE)] = {
89			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(11, 0x1),
90			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(11, 0x2),
91		},
92	},
93	[C(L1I)] = {
94		[C(OP_READ)] = {
95			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(8, 0x1),
96			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(8, 0x2),
97		},
98	},
99	[C(DTLB)] = {
100		[C(OP_READ)] = {
101			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(9, 0x1),
102			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(9, 0x8),
103		},
104	},
105	[C(ITLB)] = {
106		[C(OP_READ)] = {
107			[C(RESULT_ACCESS)]	= XTENSA_PMU_MASK(7, 0x1),
108			[C(RESULT_MISS)]	= XTENSA_PMU_MASK(7, 0x8),
109		},
110	},
111};
112
113static int xtensa_pmu_cache_event(u64 config)
114{
115	unsigned int cache_type, cache_op, cache_result;
116	int ret;
117
118	cache_type = (config >>  0) & 0xff;
119	cache_op = (config >>  8) & 0xff;
120	cache_result = (config >> 16) & 0xff;
121
122	if (cache_type >= ARRAY_SIZE(xtensa_cache_ctl) ||
123	    cache_op >= C(OP_MAX) ||
124	    cache_result >= C(RESULT_MAX))
125		return -EINVAL;
126
127	ret = xtensa_cache_ctl[cache_type][cache_op][cache_result];
128
129	if (ret == 0)
130		return -EINVAL;
131
132	return ret;
133}
134
135static inline uint32_t xtensa_pmu_read_counter(int idx)
136{
137	return get_er(XTENSA_PMU_PM(idx));
138}
139
140static inline void xtensa_pmu_write_counter(int idx, uint32_t v)
141{
142	set_er(v, XTENSA_PMU_PM(idx));
143}
144
145static void xtensa_perf_event_update(struct perf_event *event,
146				     struct hw_perf_event *hwc, int idx)
147{
148	uint64_t prev_raw_count, new_raw_count;
149	int64_t delta;
150
151	do {
152		prev_raw_count = local64_read(&hwc->prev_count);
153		new_raw_count = xtensa_pmu_read_counter(event->hw.idx);
154	} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
155				 new_raw_count) != prev_raw_count);
156
157	delta = (new_raw_count - prev_raw_count) & XTENSA_PMU_COUNTER_MASK;
158
159	local64_add(delta, &event->count);
160	local64_sub(delta, &hwc->period_left);
161}
162
163static bool xtensa_perf_event_set_period(struct perf_event *event,
164					 struct hw_perf_event *hwc, int idx)
165{
166	bool rc = false;
167	s64 left;
168
169	if (!is_sampling_event(event)) {
170		left = XTENSA_PMU_COUNTER_MAX;
171	} else {
172		s64 period = hwc->sample_period;
173
174		left = local64_read(&hwc->period_left);
175		if (left <= -period) {
176			left = period;
177			local64_set(&hwc->period_left, left);
178			hwc->last_period = period;
179			rc = true;
180		} else if (left <= 0) {
181			left += period;
182			local64_set(&hwc->period_left, left);
183			hwc->last_period = period;
184			rc = true;
185		}
186		if (left > XTENSA_PMU_COUNTER_MAX)
187			left = XTENSA_PMU_COUNTER_MAX;
188	}
189
190	local64_set(&hwc->prev_count, -left);
191	xtensa_pmu_write_counter(idx, -left);
192	perf_event_update_userpage(event);
193
194	return rc;
195}
196
197static void xtensa_pmu_enable(struct pmu *pmu)
198{
199	set_er(get_er(XTENSA_PMU_PMG) | XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
200}
201
202static void xtensa_pmu_disable(struct pmu *pmu)
203{
204	set_er(get_er(XTENSA_PMU_PMG) & ~XTENSA_PMU_PMG_PMEN, XTENSA_PMU_PMG);
205}
206
207static int xtensa_pmu_event_init(struct perf_event *event)
208{
209	int ret;
210
211	switch (event->attr.type) {
212	case PERF_TYPE_HARDWARE:
213		if (event->attr.config >= ARRAY_SIZE(xtensa_hw_ctl) ||
214		    xtensa_hw_ctl[event->attr.config] == 0)
215			return -EINVAL;
216		event->hw.config = xtensa_hw_ctl[event->attr.config];
217		return 0;
218
219	case PERF_TYPE_HW_CACHE:
220		ret = xtensa_pmu_cache_event(event->attr.config);
221		if (ret < 0)
222			return ret;
223		event->hw.config = ret;
224		return 0;
225
226	case PERF_TYPE_RAW:
227		/* Not 'previous counter' select */
228		if ((event->attr.config & XTENSA_PMU_PMCTRL_SELECT) ==
229		    (1 << XTENSA_PMU_PMCTRL_SELECT_SHIFT))
230			return -EINVAL;
231		event->hw.config = (event->attr.config &
232				    (XTENSA_PMU_PMCTRL_KRNLCNT |
233				     XTENSA_PMU_PMCTRL_TRACELEVEL |
234				     XTENSA_PMU_PMCTRL_SELECT |
235				     XTENSA_PMU_PMCTRL_MASK)) |
236			XTENSA_PMU_PMCTRL_INTEN;
237		return 0;
238
239	default:
240		return -ENOENT;
241	}
242}
243
244/*
245 * Starts/Stops a counter present on the PMU. The PMI handler
246 * should stop the counter when perf_event_overflow() returns
247 * !0. ->start() will be used to continue.
248 */
249static void xtensa_pmu_start(struct perf_event *event, int flags)
250{
251	struct hw_perf_event *hwc = &event->hw;
252	int idx = hwc->idx;
253
254	if (WARN_ON_ONCE(idx == -1))
255		return;
256
257	if (flags & PERF_EF_RELOAD) {
258		WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
259		xtensa_perf_event_set_period(event, hwc, idx);
260	}
261
262	hwc->state = 0;
263
264	set_er(hwc->config, XTENSA_PMU_PMCTRL(idx));
265}
266
267static void xtensa_pmu_stop(struct perf_event *event, int flags)
268{
269	struct hw_perf_event *hwc = &event->hw;
270	int idx = hwc->idx;
271
272	if (!(hwc->state & PERF_HES_STOPPED)) {
273		set_er(0, XTENSA_PMU_PMCTRL(idx));
274		set_er(get_er(XTENSA_PMU_PMSTAT(idx)),
275		       XTENSA_PMU_PMSTAT(idx));
276		hwc->state |= PERF_HES_STOPPED;
277	}
278
279	if ((flags & PERF_EF_UPDATE) &&
280	    !(event->hw.state & PERF_HES_UPTODATE)) {
281		xtensa_perf_event_update(event, &event->hw, idx);
282		event->hw.state |= PERF_HES_UPTODATE;
283	}
284}
285
286/*
287 * Adds/Removes a counter to/from the PMU, can be done inside
288 * a transaction, see the ->*_txn() methods.
289 */
290static int xtensa_pmu_add(struct perf_event *event, int flags)
291{
292	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
293	struct hw_perf_event *hwc = &event->hw;
294	int idx = hwc->idx;
295
296	if (__test_and_set_bit(idx, ev->used_mask)) {
297		idx = find_first_zero_bit(ev->used_mask,
298					  XCHAL_NUM_PERF_COUNTERS);
299		if (idx == XCHAL_NUM_PERF_COUNTERS)
300			return -EAGAIN;
301
302		__set_bit(idx, ev->used_mask);
303		hwc->idx = idx;
304	}
305	ev->event[idx] = event;
306
307	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
308
309	if (flags & PERF_EF_START)
310		xtensa_pmu_start(event, PERF_EF_RELOAD);
311
312	perf_event_update_userpage(event);
313	return 0;
314}
315
316static void xtensa_pmu_del(struct perf_event *event, int flags)
317{
318	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
319
320	xtensa_pmu_stop(event, PERF_EF_UPDATE);
321	__clear_bit(event->hw.idx, ev->used_mask);
322	perf_event_update_userpage(event);
323}
324
325static void xtensa_pmu_read(struct perf_event *event)
326{
327	xtensa_perf_event_update(event, &event->hw, event->hw.idx);
328}
329
330static int callchain_trace(struct stackframe *frame, void *data)
331{
332	struct perf_callchain_entry_ctx *entry = data;
333
334	perf_callchain_store(entry, frame->pc);
335	return 0;
336}
337
338void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
339			   struct pt_regs *regs)
340{
341	xtensa_backtrace_kernel(regs, entry->max_stack,
342				callchain_trace, NULL, entry);
343}
344
345void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
346			 struct pt_regs *regs)
347{
348	xtensa_backtrace_user(regs, entry->max_stack,
349			      callchain_trace, entry);
350}
351
352void perf_event_print_debug(void)
353{
354	unsigned long flags;
355	unsigned i;
356
357	local_irq_save(flags);
358	pr_info("CPU#%d: PMG: 0x%08lx\n", smp_processor_id(),
359		get_er(XTENSA_PMU_PMG));
360	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i)
361		pr_info("PM%d: 0x%08lx, PMCTRL%d: 0x%08lx, PMSTAT%d: 0x%08lx\n",
362			i, get_er(XTENSA_PMU_PM(i)),
363			i, get_er(XTENSA_PMU_PMCTRL(i)),
364			i, get_er(XTENSA_PMU_PMSTAT(i)));
365	local_irq_restore(flags);
366}
367
368irqreturn_t xtensa_pmu_irq_handler(int irq, void *dev_id)
369{
370	irqreturn_t rc = IRQ_NONE;
371	struct xtensa_pmu_events *ev = this_cpu_ptr(&xtensa_pmu_events);
372	unsigned i;
373
374	for_each_set_bit(i, ev->used_mask, XCHAL_NUM_PERF_COUNTERS) {
375		uint32_t v = get_er(XTENSA_PMU_PMSTAT(i));
376		struct perf_event *event = ev->event[i];
377		struct hw_perf_event *hwc = &event->hw;
378		u64 last_period;
379
380		if (!(v & XTENSA_PMU_PMSTAT_OVFL))
381			continue;
382
383		set_er(v, XTENSA_PMU_PMSTAT(i));
384		xtensa_perf_event_update(event, hwc, i);
385		last_period = hwc->last_period;
386		if (xtensa_perf_event_set_period(event, hwc, i)) {
387			struct perf_sample_data data;
388			struct pt_regs *regs = get_irq_regs();
389
390			perf_sample_data_init(&data, 0, last_period);
391			if (perf_event_overflow(event, &data, regs))
392				xtensa_pmu_stop(event, 0);
393		}
394
395		rc = IRQ_HANDLED;
396	}
397	return rc;
398}
399
400static struct pmu xtensa_pmu = {
401	.pmu_enable = xtensa_pmu_enable,
402	.pmu_disable = xtensa_pmu_disable,
403	.event_init = xtensa_pmu_event_init,
404	.add = xtensa_pmu_add,
405	.del = xtensa_pmu_del,
406	.start = xtensa_pmu_start,
407	.stop = xtensa_pmu_stop,
408	.read = xtensa_pmu_read,
409};
410
411static int xtensa_pmu_setup(unsigned int cpu)
412{
413	unsigned i;
414
415	set_er(0, XTENSA_PMU_PMG);
416	for (i = 0; i < XCHAL_NUM_PERF_COUNTERS; ++i) {
417		set_er(0, XTENSA_PMU_PMCTRL(i));
418		set_er(get_er(XTENSA_PMU_PMSTAT(i)), XTENSA_PMU_PMSTAT(i));
419	}
420	return 0;
421}
422
423static int __init xtensa_pmu_init(void)
424{
425	int ret;
426	int irq = irq_create_mapping(NULL, XCHAL_PROFILING_INTERRUPT);
427
428	ret = cpuhp_setup_state(CPUHP_AP_PERF_XTENSA_STARTING,
429				"perf/xtensa:starting", xtensa_pmu_setup,
430				NULL);
431	if (ret) {
432		pr_err("xtensa_pmu: failed to register CPU-hotplug.\n");
433		return ret;
434	}
435#if XTENSA_FAKE_NMI
436	enable_irq(irq);
437#else
438	ret = request_irq(irq, xtensa_pmu_irq_handler, IRQF_PERCPU,
439			  "pmu", NULL);
440	if (ret < 0)
441		return ret;
442#endif
443
444	ret = perf_pmu_register(&xtensa_pmu, "cpu", PERF_TYPE_RAW);
445	if (ret)
446		free_irq(irq, NULL);
447
448	return ret;
449}
450early_initcall(xtensa_pmu_init);
451