1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Implement support for AMD Fam19h Branch Sampling feature
4 * Based on specifications published in AMD PPR Fam19 Model 01
5 *
6 * Copyright 2021 Google LLC
7 * Contributed by Stephane Eranian <eranian@google.com>
8 */
9#include <linux/kernel.h>
10#include <linux/jump_label.h>
11#include <asm/msr.h>
12#include <asm/cpufeature.h>
13
14#include "../perf_event.h"
15
16#define BRS_POISON	0xFFFFFFFFFFFFFFFEULL /* mark limit of valid entries */
17
18/* Debug Extension Configuration register layout */
19union amd_debug_extn_cfg {
20	__u64 val;
21	struct {
22		__u64	rsvd0:2,  /* reserved */
23			brsmen:1, /* branch sample enable */
24			rsvd4_3:2,/* reserved - must be 0x3 */
25			vb:1,     /* valid branches recorded */
26			rsvd2:10, /* reserved */
27			msroff:4, /* index of next entry to write */
28			rsvd3:4,  /* reserved */
29			pmc:3,    /* #PMC holding the sampling event */
30			rsvd4:37; /* reserved */
31	};
32};
33
34static inline unsigned int brs_from(int idx)
35{
36	return MSR_AMD_SAMP_BR_FROM + 2 * idx;
37}
38
39static inline unsigned int brs_to(int idx)
40{
41	return MSR_AMD_SAMP_BR_FROM + 2 * idx + 1;
42}
43
44static __always_inline void set_debug_extn_cfg(u64 val)
45{
46	/* bits[4:3] must always be set to 11b */
47	__wrmsr(MSR_AMD_DBG_EXTN_CFG, val | 3ULL << 3, val >> 32);
48}
49
50static __always_inline u64 get_debug_extn_cfg(void)
51{
52	return __rdmsr(MSR_AMD_DBG_EXTN_CFG);
53}
54
55static bool __init amd_brs_detect(void)
56{
57	if (!cpu_feature_enabled(X86_FEATURE_BRS))
58		return false;
59
60	switch (boot_cpu_data.x86) {
61	case 0x19: /* AMD Fam19h (Zen3) */
62		x86_pmu.lbr_nr = 16;
63
64		/* No hardware filtering supported */
65		x86_pmu.lbr_sel_map = NULL;
66		x86_pmu.lbr_sel_mask = 0;
67		break;
68	default:
69		return false;
70	}
71
72	return true;
73}
74
75/*
76 * Current BRS implementation does not support branch type or privilege level
77 * filtering. Therefore, this function simply enforces these limitations. No need for
78 * a br_sel_map. Software filtering is not supported because it would not correlate well
79 * with a sampling period.
80 */
81static int amd_brs_setup_filter(struct perf_event *event)
82{
83	u64 type = event->attr.branch_sample_type;
84
85	/* No BRS support */
86	if (!x86_pmu.lbr_nr)
87		return -EOPNOTSUPP;
88
89	/* Can only capture all branches, i.e., no filtering */
90	if ((type & ~PERF_SAMPLE_BRANCH_PLM_ALL) != PERF_SAMPLE_BRANCH_ANY)
91		return -EINVAL;
92
93	return 0;
94}
95
96static inline int amd_is_brs_event(struct perf_event *e)
97{
98	return (e->hw.config & AMD64_RAW_EVENT_MASK) == AMD_FAM19H_BRS_EVENT;
99}
100
101int amd_brs_hw_config(struct perf_event *event)
102{
103	int ret = 0;
104
105	/*
106	 * Due to interrupt holding, BRS is not recommended in
107	 * counting mode.
108	 */
109	if (!is_sampling_event(event))
110		return -EINVAL;
111
112	/*
113	 * Due to the way BRS operates by holding the interrupt until
114	 * lbr_nr entries have been captured, it does not make sense
115	 * to allow sampling on BRS with an event that does not match
116	 * what BRS is capturing, i.e., retired taken branches.
117	 * Otherwise the correlation with the event's period is even
118	 * more loose:
119	 *
120	 * With retired taken branch:
121	 *   Effective P = P + 16 + X
122	 * With any other event:
123	 *   Effective P = P + Y + X
124	 *
125	 * Where X is the number of taken branches due to interrupt
126	 * skid. Skid is large.
127	 *
128	 * Where Y is the occurrences of the event while BRS is
129	 * capturing the lbr_nr entries.
130	 *
131	 * By using retired taken branches, we limit the impact on the
132	 * Y variable. We know it cannot be more than the depth of
133	 * BRS.
134	 */
135	if (!amd_is_brs_event(event))
136		return -EINVAL;
137
138	/*
139	 * BRS implementation does not work with frequency mode
140	 * reprogramming of the period.
141	 */
142	if (event->attr.freq)
143		return -EINVAL;
144	/*
145	 * The kernel subtracts BRS depth from period, so it must
146	 * be big enough.
147	 */
148	if (event->attr.sample_period <= x86_pmu.lbr_nr)
149		return -EINVAL;
150
151	/*
152	 * Check if we can allow PERF_SAMPLE_BRANCH_STACK
153	 */
154	ret = amd_brs_setup_filter(event);
155
156	/* only set in case of success */
157	if (!ret)
158		event->hw.flags |= PERF_X86_EVENT_AMD_BRS;
159
160	return ret;
161}
162
163/* tos = top of stack, i.e., last valid entry written */
164static inline int amd_brs_get_tos(union amd_debug_extn_cfg *cfg)
165{
166	/*
167	 * msroff: index of next entry to write so top-of-stack is one off
168	 * if BRS is full then msroff is set back to 0.
169	 */
170	return (cfg->msroff ? cfg->msroff : x86_pmu.lbr_nr) - 1;
171}
172
173/*
174 * make sure we have a sane BRS offset to begin with
175 * especially with kexec
176 */
177void amd_brs_reset(void)
178{
179	if (!cpu_feature_enabled(X86_FEATURE_BRS))
180		return;
181
182	/*
183	 * Reset config
184	 */
185	set_debug_extn_cfg(0);
186
187	/*
188	 * Mark first entry as poisoned
189	 */
190	wrmsrl(brs_to(0), BRS_POISON);
191}
192
193int __init amd_brs_init(void)
194{
195	if (!amd_brs_detect())
196		return -EOPNOTSUPP;
197
198	pr_cont("%d-deep BRS, ", x86_pmu.lbr_nr);
199
200	return 0;
201}
202
203void amd_brs_enable(void)
204{
205	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
206	union amd_debug_extn_cfg cfg;
207
208	/* Activate only on first user */
209	if (++cpuc->brs_active > 1)
210		return;
211
212	cfg.val    = 0; /* reset all fields */
213	cfg.brsmen = 1; /* enable branch sampling */
214
215	/* Set enable bit */
216	set_debug_extn_cfg(cfg.val);
217}
218
219void amd_brs_enable_all(void)
220{
221	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
222	if (cpuc->lbr_users)
223		amd_brs_enable();
224}
225
226void amd_brs_disable(void)
227{
228	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
229	union amd_debug_extn_cfg cfg;
230
231	/* Check if active (could be disabled via x86_pmu_disable_all()) */
232	if (!cpuc->brs_active)
233		return;
234
235	/* Only disable for last user */
236	if (--cpuc->brs_active)
237		return;
238
239	/*
240	 * Clear the brsmen bit but preserve the others as they contain
241	 * useful state such as vb and msroff
242	 */
243	cfg.val = get_debug_extn_cfg();
244
245	/*
246	 * When coming in on interrupt and BRS is full, then hw will have
247	 * already stopped BRS, no need to issue wrmsr again
248	 */
249	if (cfg.brsmen) {
250		cfg.brsmen = 0;
251		set_debug_extn_cfg(cfg.val);
252	}
253}
254
255void amd_brs_disable_all(void)
256{
257	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
258	if (cpuc->lbr_users)
259		amd_brs_disable();
260}
261
262static bool amd_brs_match_plm(struct perf_event *event, u64 to)
263{
264	int type = event->attr.branch_sample_type;
265	int plm_k = PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_HV;
266	int plm_u = PERF_SAMPLE_BRANCH_USER;
267
268	if (!(type & plm_k) && kernel_ip(to))
269		return 0;
270
271	if (!(type & plm_u) && !kernel_ip(to))
272		return 0;
273
274	return 1;
275}
276
277/*
278 * Caller must ensure amd_brs_inuse() is true before calling
279 * return:
280 */
281void amd_brs_drain(void)
282{
283	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
284	struct perf_event *event = cpuc->events[0];
285	struct perf_branch_entry *br = cpuc->lbr_entries;
286	union amd_debug_extn_cfg cfg;
287	u32 i, nr = 0, num, tos, start;
288	u32 shift = 64 - boot_cpu_data.x86_virt_bits;
289
290	/*
291	 * BRS event forced on PMC0,
292	 * so check if there is an event.
293	 * It is possible to have lbr_users > 0 but the event
294	 * not yet scheduled due to long latency PMU irq
295	 */
296	if (!event)
297		goto empty;
298
299	cfg.val = get_debug_extn_cfg();
300
301	/* Sanity check [0-x86_pmu.lbr_nr] */
302	if (WARN_ON_ONCE(cfg.msroff >= x86_pmu.lbr_nr))
303		goto empty;
304
305	/* No valid branch */
306	if (cfg.vb == 0)
307		goto empty;
308
309	/*
310	 * msr.off points to next entry to be written
311	 * tos = most recent entry index = msr.off - 1
312	 * BRS register buffer saturates, so we know we have
313	 * start < tos and that we have to read from start to tos
314	 */
315	start = 0;
316	tos = amd_brs_get_tos(&cfg);
317
318	num = tos - start + 1;
319
320	/*
321	 * BRS is only one pass (saturation) from MSROFF to depth-1
322	 * MSROFF wraps to zero when buffer is full
323	 */
324	for (i = 0; i < num; i++) {
325		u32 brs_idx = tos - i;
326		u64 from, to;
327
328		rdmsrl(brs_to(brs_idx), to);
329
330		/* Entry does not belong to us (as marked by kernel) */
331		if (to == BRS_POISON)
332			break;
333
334		/*
335		 * Sign-extend SAMP_BR_TO to 64 bits, bits 61-63 are reserved.
336		 * Necessary to generate proper virtual addresses suitable for
337		 * symbolization
338		 */
339		to = (u64)(((s64)to << shift) >> shift);
340
341		if (!amd_brs_match_plm(event, to))
342			continue;
343
344		rdmsrl(brs_from(brs_idx), from);
345
346		perf_clear_branch_entry_bitfields(br+nr);
347
348		br[nr].from = from;
349		br[nr].to   = to;
350
351		nr++;
352	}
353empty:
354	/* Record number of sampled branches */
355	cpuc->lbr_stack.nr = nr;
356}
357
358/*
359 * Poison most recent entry to prevent reuse by next task
360 * required because BRS entry are not tagged by PID
361 */
362static void amd_brs_poison_buffer(void)
363{
364	union amd_debug_extn_cfg cfg;
365	unsigned int idx;
366
367	/* Get current state */
368	cfg.val = get_debug_extn_cfg();
369
370	/* idx is most recently written entry */
371	idx = amd_brs_get_tos(&cfg);
372
373	/* Poison target of entry */
374	wrmsrl(brs_to(idx), BRS_POISON);
375}
376
377/*
378 * On context switch in, we need to make sure no samples from previous user
379 * are left in the BRS.
380 *
381 * On ctxswin, sched_in = true, called after the PMU has started
382 * On ctxswout, sched_in = false, called before the PMU is stopped
383 */
384void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
385{
386	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
387
388	/* no active users */
389	if (!cpuc->lbr_users)
390		return;
391
392	/*
393	 * On context switch in, we need to ensure we do not use entries
394	 * from previous BRS user on that CPU, so we poison the buffer as
395	 * a faster way compared to resetting all entries.
396	 */
397	if (sched_in)
398		amd_brs_poison_buffer();
399}
400
401/*
402 * called from ACPI processor_idle.c or acpi_pad.c
403 * with interrupts disabled
404 */
405void noinstr perf_amd_brs_lopwr_cb(bool lopwr_in)
406{
407	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
408	union amd_debug_extn_cfg cfg;
409
410	/*
411	 * on mwait in, we may end up in non C0 state.
412	 * we must disable branch sampling to avoid holding the NMI
413	 * for too long. We disable it in hardware but we
414	 * keep the state in cpuc, so we can re-enable.
415	 *
416	 * The hardware will deliver the NMI if needed when brsmen cleared
417	 */
418	if (cpuc->brs_active) {
419		cfg.val = get_debug_extn_cfg();
420		cfg.brsmen = !lopwr_in;
421		set_debug_extn_cfg(cfg.val);
422	}
423}
424
425DEFINE_STATIC_CALL_NULL(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
426EXPORT_STATIC_CALL_TRAMP_GPL(perf_lopwr_cb);
427
428void __init amd_brs_lopwr_init(void)
429{
430	static_call_update(perf_lopwr_cb, perf_amd_brs_lopwr_cb);
431}
432