1// SPDX-License-Identifier: GPL-2.0
2#include <linux/perf_event.h>
3#include <asm/perf_event.h>
4
5#include "../perf_event.h"
6
7/* LBR Branch Select valid bits */
8#define LBR_SELECT_MASK		0x1ff
9
10/*
11 * LBR Branch Select filter bits which when set, ensures that the
12 * corresponding type of branches are not recorded
13 */
14#define LBR_SELECT_KERNEL		0	/* Branches ending in CPL = 0 */
15#define LBR_SELECT_USER			1	/* Branches ending in CPL > 0 */
16#define LBR_SELECT_JCC			2	/* Conditional branches */
17#define LBR_SELECT_CALL_NEAR_REL	3	/* Near relative calls */
18#define LBR_SELECT_CALL_NEAR_IND	4	/* Indirect relative calls */
19#define LBR_SELECT_RET_NEAR		5	/* Near returns */
20#define LBR_SELECT_JMP_NEAR_IND		6	/* Near indirect jumps (excl. calls and returns) */
21#define LBR_SELECT_JMP_NEAR_REL		7	/* Near relative jumps (excl. calls) */
22#define LBR_SELECT_FAR_BRANCH		8	/* Far branches */
23
24#define LBR_KERNEL	BIT(LBR_SELECT_KERNEL)
25#define LBR_USER	BIT(LBR_SELECT_USER)
26#define LBR_JCC		BIT(LBR_SELECT_JCC)
27#define LBR_REL_CALL	BIT(LBR_SELECT_CALL_NEAR_REL)
28#define LBR_IND_CALL	BIT(LBR_SELECT_CALL_NEAR_IND)
29#define LBR_RETURN	BIT(LBR_SELECT_RET_NEAR)
30#define LBR_REL_JMP	BIT(LBR_SELECT_JMP_NEAR_REL)
31#define LBR_IND_JMP	BIT(LBR_SELECT_JMP_NEAR_IND)
32#define LBR_FAR		BIT(LBR_SELECT_FAR_BRANCH)
33#define LBR_NOT_SUPP	-1	/* unsupported filter */
34#define LBR_IGNORE	0
35
36#define LBR_ANY		\
37	(LBR_JCC | LBR_REL_CALL | LBR_IND_CALL | LBR_RETURN |	\
38	 LBR_REL_JMP | LBR_IND_JMP | LBR_FAR)
39
40struct branch_entry {
41	union {
42		struct {
43			u64	ip:58;
44			u64	ip_sign_ext:5;
45			u64	mispredict:1;
46		} split;
47		u64		full;
48	} from;
49
50	union {
51		struct {
52			u64	ip:58;
53			u64	ip_sign_ext:3;
54			u64	reserved:1;
55			u64	spec:1;
56			u64	valid:1;
57		} split;
58		u64		full;
59	} to;
60};
61
62static __always_inline void amd_pmu_lbr_set_from(unsigned int idx, u64 val)
63{
64	wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
65}
66
67static __always_inline void amd_pmu_lbr_set_to(unsigned int idx, u64 val)
68{
69	wrmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
70}
71
72static __always_inline u64 amd_pmu_lbr_get_from(unsigned int idx)
73{
74	u64 val;
75
76	rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2, val);
77
78	return val;
79}
80
81static __always_inline u64 amd_pmu_lbr_get_to(unsigned int idx)
82{
83	u64 val;
84
85	rdmsrl(MSR_AMD_SAMP_BR_FROM + idx * 2 + 1, val);
86
87	return val;
88}
89
90static __always_inline u64 sign_ext_branch_ip(u64 ip)
91{
92	u32 shift = 64 - boot_cpu_data.x86_virt_bits;
93
94	return (u64)(((s64)ip << shift) >> shift);
95}
96
97static void amd_pmu_lbr_filter(void)
98{
99	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
100	int br_sel = cpuc->br_sel, offset, type, i, j;
101	bool compress = false;
102	bool fused_only = false;
103	u64 from, to;
104
105	/* If sampling all branches, there is nothing to filter */
106	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
107	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
108		fused_only = true;
109
110	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
111		from = cpuc->lbr_entries[i].from;
112		to = cpuc->lbr_entries[i].to;
113		type = branch_type_fused(from, to, 0, &offset);
114
115		/*
116		 * Adjust the branch from address in case of instruction
117		 * fusion where it points to an instruction preceding the
118		 * actual branch
119		 */
120		if (offset) {
121			cpuc->lbr_entries[i].from += offset;
122			if (fused_only)
123				continue;
124		}
125
126		/* If type does not correspond, then discard */
127		if (type == X86_BR_NONE || (br_sel & type) != type) {
128			cpuc->lbr_entries[i].from = 0;	/* mark invalid */
129			compress = true;
130		}
131
132		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
133			cpuc->lbr_entries[i].type = common_branch_type(type);
134	}
135
136	if (!compress)
137		return;
138
139	/* Remove all invalid entries */
140	for (i = 0; i < cpuc->lbr_stack.nr; ) {
141		if (!cpuc->lbr_entries[i].from) {
142			j = i;
143			while (++j < cpuc->lbr_stack.nr)
144				cpuc->lbr_entries[j - 1] = cpuc->lbr_entries[j];
145			cpuc->lbr_stack.nr--;
146			if (!cpuc->lbr_entries[i].from)
147				continue;
148		}
149		i++;
150	}
151}
152
153static const int lbr_spec_map[PERF_BR_SPEC_MAX] = {
154	PERF_BR_SPEC_NA,
155	PERF_BR_SPEC_WRONG_PATH,
156	PERF_BR_NON_SPEC_CORRECT_PATH,
157	PERF_BR_SPEC_CORRECT_PATH,
158};
159
160void amd_pmu_lbr_read(void)
161{
162	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
163	struct perf_branch_entry *br = cpuc->lbr_entries;
164	struct branch_entry entry;
165	int out = 0, idx, i;
166
167	if (!cpuc->lbr_users)
168		return;
169
170	for (i = 0; i < x86_pmu.lbr_nr; i++) {
171		entry.from.full	= amd_pmu_lbr_get_from(i);
172		entry.to.full	= amd_pmu_lbr_get_to(i);
173
174		/*
175		 * Check if a branch has been logged; if valid = 0, spec = 0
176		 * then no branch was recorded; if reserved = 1 then an
177		 * erroneous branch was recorded (see Erratum 1452)
178		 */
179		if ((!entry.to.split.valid && !entry.to.split.spec) ||
180		    entry.to.split.reserved)
181			continue;
182
183		perf_clear_branch_entry_bitfields(br + out);
184
185		br[out].from	= sign_ext_branch_ip(entry.from.split.ip);
186		br[out].to	= sign_ext_branch_ip(entry.to.split.ip);
187		br[out].mispred	= entry.from.split.mispredict;
188		br[out].predicted = !br[out].mispred;
189
190		/*
191		 * Set branch speculation information using the status of
192		 * the valid and spec bits.
193		 *
194		 * When valid = 0, spec = 0, no branch was recorded and the
195		 * entry is discarded as seen above.
196		 *
197		 * When valid = 0, spec = 1, the recorded branch was
198		 * speculative but took the wrong path.
199		 *
200		 * When valid = 1, spec = 0, the recorded branch was
201		 * non-speculative but took the correct path.
202		 *
203		 * When valid = 1, spec = 1, the recorded branch was
204		 * speculative and took the correct path
205		 */
206		idx = (entry.to.split.valid << 1) | entry.to.split.spec;
207		br[out].spec = lbr_spec_map[idx];
208		out++;
209	}
210
211	cpuc->lbr_stack.nr = out;
212
213	/*
214	 * Internal register renaming always ensures that LBR From[0] and
215	 * LBR To[0] always represent the TOS
216	 */
217	cpuc->lbr_stack.hw_idx = 0;
218
219	/* Perform further software filtering */
220	amd_pmu_lbr_filter();
221}
222
223static const int lbr_select_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
224	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
225	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
226	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGNORE,
227
228	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
229	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL | LBR_FAR,
230	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
231	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
232	[PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT]	= LBR_NOT_SUPP,
233	[PERF_SAMPLE_BRANCH_IN_TX_SHIFT]	= LBR_NOT_SUPP,
234	[PERF_SAMPLE_BRANCH_NO_TX_SHIFT]	= LBR_NOT_SUPP,
235	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
236
237	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]	= LBR_NOT_SUPP,
238	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
239	[PERF_SAMPLE_BRANCH_CALL_SHIFT]		= LBR_REL_CALL,
240
241	[PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT]	= LBR_NOT_SUPP,
242	[PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT]	= LBR_NOT_SUPP,
243};
244
245static int amd_pmu_lbr_setup_filter(struct perf_event *event)
246{
247	struct hw_perf_event_extra *reg = &event->hw.branch_reg;
248	u64 br_type = event->attr.branch_sample_type;
249	u64 mask = 0, v;
250	int i;
251
252	/* No LBR support */
253	if (!x86_pmu.lbr_nr)
254		return -EOPNOTSUPP;
255
256	if (br_type & PERF_SAMPLE_BRANCH_USER)
257		mask |= X86_BR_USER;
258
259	if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
260		mask |= X86_BR_KERNEL;
261
262	/* Ignore BRANCH_HV here */
263
264	if (br_type & PERF_SAMPLE_BRANCH_ANY)
265		mask |= X86_BR_ANY;
266
267	if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
268		mask |= X86_BR_ANY_CALL;
269
270	if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
271		mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
272
273	if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
274		mask |= X86_BR_IND_CALL;
275
276	if (br_type & PERF_SAMPLE_BRANCH_COND)
277		mask |= X86_BR_JCC;
278
279	if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
280		mask |= X86_BR_IND_JMP;
281
282	if (br_type & PERF_SAMPLE_BRANCH_CALL)
283		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
284
285	if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
286		mask |= X86_BR_TYPE_SAVE;
287
288	reg->reg = mask;
289	mask = 0;
290
291	for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
292		if (!(br_type & BIT_ULL(i)))
293			continue;
294
295		v = lbr_select_map[i];
296		if (v == LBR_NOT_SUPP)
297			return -EOPNOTSUPP;
298
299		if (v != LBR_IGNORE)
300			mask |= v;
301	}
302
303	/* Filter bits operate in suppress mode */
304	reg->config = mask ^ LBR_SELECT_MASK;
305
306	return 0;
307}
308
309int amd_pmu_lbr_hw_config(struct perf_event *event)
310{
311	int ret = 0;
312
313	/* LBR is not recommended in counting mode */
314	if (!is_sampling_event(event))
315		return -EINVAL;
316
317	ret = amd_pmu_lbr_setup_filter(event);
318	if (!ret)
319		event->attach_state |= PERF_ATTACH_SCHED_CB;
320
321	return ret;
322}
323
324void amd_pmu_lbr_reset(void)
325{
326	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
327	int i;
328
329	if (!x86_pmu.lbr_nr)
330		return;
331
332	/* Reset all branch records individually */
333	for (i = 0; i < x86_pmu.lbr_nr; i++) {
334		amd_pmu_lbr_set_from(i, 0);
335		amd_pmu_lbr_set_to(i, 0);
336	}
337
338	cpuc->last_task_ctx = NULL;
339	cpuc->last_log_id = 0;
340	wrmsrl(MSR_AMD64_LBR_SELECT, 0);
341}
342
343void amd_pmu_lbr_add(struct perf_event *event)
344{
345	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
346	struct hw_perf_event_extra *reg = &event->hw.branch_reg;
347
348	if (!x86_pmu.lbr_nr)
349		return;
350
351	if (has_branch_stack(event)) {
352		cpuc->lbr_select = 1;
353		cpuc->lbr_sel->config = reg->config;
354		cpuc->br_sel = reg->reg;
355	}
356
357	perf_sched_cb_inc(event->pmu);
358
359	if (!cpuc->lbr_users++ && !event->total_time_running)
360		amd_pmu_lbr_reset();
361}
362
363void amd_pmu_lbr_del(struct perf_event *event)
364{
365	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
366
367	if (!x86_pmu.lbr_nr)
368		return;
369
370	if (has_branch_stack(event))
371		cpuc->lbr_select = 0;
372
373	cpuc->lbr_users--;
374	WARN_ON_ONCE(cpuc->lbr_users < 0);
375	perf_sched_cb_dec(event->pmu);
376}
377
378void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
379{
380	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
381
382	/*
383	 * A context switch can flip the address space and LBR entries are
384	 * not tagged with an identifier. Hence, branches cannot be resolved
385	 * from the old address space and the LBR records should be wiped.
386	 */
387	if (cpuc->lbr_users && sched_in)
388		amd_pmu_lbr_reset();
389}
390
391void amd_pmu_lbr_enable_all(void)
392{
393	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
394	u64 lbr_select, dbg_ctl, dbg_extn_cfg;
395
396	if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
397		return;
398
399	/* Set hardware branch filter */
400	if (cpuc->lbr_select) {
401		lbr_select = cpuc->lbr_sel->config & LBR_SELECT_MASK;
402		wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
403	}
404
405	if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
406		rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
407		wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
408	}
409
410	rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
411	wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
412}
413
414void amd_pmu_lbr_disable_all(void)
415{
416	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
417	u64 dbg_ctl, dbg_extn_cfg;
418
419	if (!cpuc->lbr_users || !x86_pmu.lbr_nr)
420		return;
421
422	rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
423	wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
424
425	if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
426		rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
427		wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
428	}
429}
430
431__init int amd_pmu_lbr_init(void)
432{
433	union cpuid_0x80000022_ebx ebx;
434
435	if (x86_pmu.version < 2 || !boot_cpu_has(X86_FEATURE_AMD_LBR_V2))
436		return -EOPNOTSUPP;
437
438	/* Set number of entries */
439	ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
440	x86_pmu.lbr_nr = ebx.split.lbr_v2_stack_sz;
441
442	pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
443
444	return 0;
445}
446