1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * nd_perf.c: NVDIMM Device Performance Monitoring Unit support
4 *
5 * Perf interface to expose nvdimm performance stats.
6 *
7 * Copyright (C) 2021 IBM Corporation
8 */
9
10#define pr_fmt(fmt) "nvdimm_pmu: " fmt
11
12#include <linux/nd.h>
13#include <linux/platform_device.h>
14
15#define EVENT(_name, _code)     enum{_name = _code}
16
17/*
18 * NVDIMM Events codes.
19 */
20
21/* Controller Reset Count */
22EVENT(CTL_RES_CNT,		0x1);
23/* Controller Reset Elapsed Time */
24EVENT(CTL_RES_TM,		0x2);
25/* Power-on Seconds */
26EVENT(POWERON_SECS,		0x3);
27/* Life Remaining */
28EVENT(MEM_LIFE,		0x4);
29/* Critical Resource Utilization */
30EVENT(CRI_RES_UTIL,		0x5);
31/* Host Load Count */
32EVENT(HOST_L_CNT,		0x6);
33/* Host Store Count */
34EVENT(HOST_S_CNT,		0x7);
35/* Host Store Duration */
36EVENT(HOST_S_DUR,		0x8);
37/* Host Load Duration */
38EVENT(HOST_L_DUR,		0x9);
39/* Media Read Count */
40EVENT(MED_R_CNT,		0xa);
41/* Media Write Count */
42EVENT(MED_W_CNT,		0xb);
43/* Media Read Duration */
44EVENT(MED_R_DUR,		0xc);
45/* Media Write Duration */
46EVENT(MED_W_DUR,		0xd);
47/* Cache Read Hit Count */
48EVENT(CACHE_RH_CNT,		0xe);
49/* Cache Write Hit Count */
50EVENT(CACHE_WH_CNT,		0xf);
51/* Fast Write Count */
52EVENT(FAST_W_CNT,		0x10);
53
54NVDIMM_EVENT_ATTR(ctl_res_cnt,		CTL_RES_CNT);
55NVDIMM_EVENT_ATTR(ctl_res_tm,		CTL_RES_TM);
56NVDIMM_EVENT_ATTR(poweron_secs,		POWERON_SECS);
57NVDIMM_EVENT_ATTR(mem_life,		MEM_LIFE);
58NVDIMM_EVENT_ATTR(cri_res_util,		CRI_RES_UTIL);
59NVDIMM_EVENT_ATTR(host_l_cnt,		HOST_L_CNT);
60NVDIMM_EVENT_ATTR(host_s_cnt,		HOST_S_CNT);
61NVDIMM_EVENT_ATTR(host_s_dur,		HOST_S_DUR);
62NVDIMM_EVENT_ATTR(host_l_dur,		HOST_L_DUR);
63NVDIMM_EVENT_ATTR(med_r_cnt,		MED_R_CNT);
64NVDIMM_EVENT_ATTR(med_w_cnt,		MED_W_CNT);
65NVDIMM_EVENT_ATTR(med_r_dur,		MED_R_DUR);
66NVDIMM_EVENT_ATTR(med_w_dur,		MED_W_DUR);
67NVDIMM_EVENT_ATTR(cache_rh_cnt,		CACHE_RH_CNT);
68NVDIMM_EVENT_ATTR(cache_wh_cnt,		CACHE_WH_CNT);
69NVDIMM_EVENT_ATTR(fast_w_cnt,		FAST_W_CNT);
70
71static struct attribute *nvdimm_events_attr[] = {
72	NVDIMM_EVENT_PTR(CTL_RES_CNT),
73	NVDIMM_EVENT_PTR(CTL_RES_TM),
74	NVDIMM_EVENT_PTR(POWERON_SECS),
75	NVDIMM_EVENT_PTR(MEM_LIFE),
76	NVDIMM_EVENT_PTR(CRI_RES_UTIL),
77	NVDIMM_EVENT_PTR(HOST_L_CNT),
78	NVDIMM_EVENT_PTR(HOST_S_CNT),
79	NVDIMM_EVENT_PTR(HOST_S_DUR),
80	NVDIMM_EVENT_PTR(HOST_L_DUR),
81	NVDIMM_EVENT_PTR(MED_R_CNT),
82	NVDIMM_EVENT_PTR(MED_W_CNT),
83	NVDIMM_EVENT_PTR(MED_R_DUR),
84	NVDIMM_EVENT_PTR(MED_W_DUR),
85	NVDIMM_EVENT_PTR(CACHE_RH_CNT),
86	NVDIMM_EVENT_PTR(CACHE_WH_CNT),
87	NVDIMM_EVENT_PTR(FAST_W_CNT),
88	NULL
89};
90
91static struct attribute_group nvdimm_pmu_events_group = {
92	.name = "events",
93	.attrs = nvdimm_events_attr,
94};
95
96PMU_FORMAT_ATTR(event, "config:0-4");
97
98static struct attribute *nvdimm_pmu_format_attr[] = {
99	&format_attr_event.attr,
100	NULL,
101};
102
103static struct attribute_group nvdimm_pmu_format_group = {
104	.name = "format",
105	.attrs = nvdimm_pmu_format_attr,
106};
107
108ssize_t nvdimm_events_sysfs_show(struct device *dev,
109				 struct device_attribute *attr, char *page)
110{
111	struct perf_pmu_events_attr *pmu_attr;
112
113	pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
114
115	return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
116}
117
118static ssize_t nvdimm_pmu_cpumask_show(struct device *dev,
119				       struct device_attribute *attr, char *buf)
120{
121	struct pmu *pmu = dev_get_drvdata(dev);
122	struct nvdimm_pmu *nd_pmu;
123
124	nd_pmu = container_of(pmu, struct nvdimm_pmu, pmu);
125
126	return cpumap_print_to_pagebuf(true, buf, cpumask_of(nd_pmu->cpu));
127}
128
129static int nvdimm_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
130{
131	struct nvdimm_pmu *nd_pmu;
132	u32 target;
133	int nodeid;
134	const struct cpumask *cpumask;
135
136	nd_pmu = hlist_entry_safe(node, struct nvdimm_pmu, node);
137
138	/* Clear it, incase given cpu is set in nd_pmu->arch_cpumask */
139	cpumask_test_and_clear_cpu(cpu, &nd_pmu->arch_cpumask);
140
141	/*
142	 * If given cpu is not same as current designated cpu for
143	 * counter access, just return.
144	 */
145	if (cpu != nd_pmu->cpu)
146		return 0;
147
148	/* Check for any active cpu in nd_pmu->arch_cpumask */
149	target = cpumask_any(&nd_pmu->arch_cpumask);
150
151	/*
152	 * Incase we don't have any active cpu in nd_pmu->arch_cpumask,
153	 * check in given cpu's numa node list.
154	 */
155	if (target >= nr_cpu_ids) {
156		nodeid = cpu_to_node(cpu);
157		cpumask = cpumask_of_node(nodeid);
158		target = cpumask_any_but(cpumask, cpu);
159	}
160	nd_pmu->cpu = target;
161
162	/* Migrate nvdimm pmu events to the new target cpu if valid */
163	if (target >= 0 && target < nr_cpu_ids)
164		perf_pmu_migrate_context(&nd_pmu->pmu, cpu, target);
165
166	return 0;
167}
168
169static int nvdimm_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
170{
171	struct nvdimm_pmu *nd_pmu;
172
173	nd_pmu = hlist_entry_safe(node, struct nvdimm_pmu, node);
174
175	if (nd_pmu->cpu >= nr_cpu_ids)
176		nd_pmu->cpu = cpu;
177
178	return 0;
179}
180
181static int create_cpumask_attr_group(struct nvdimm_pmu *nd_pmu)
182{
183	struct perf_pmu_events_attr *pmu_events_attr;
184	struct attribute **attrs_group;
185	struct attribute_group *nvdimm_pmu_cpumask_group;
186
187	pmu_events_attr = kzalloc(sizeof(*pmu_events_attr), GFP_KERNEL);
188	if (!pmu_events_attr)
189		return -ENOMEM;
190
191	attrs_group = kzalloc(2 * sizeof(struct attribute *), GFP_KERNEL);
192	if (!attrs_group) {
193		kfree(pmu_events_attr);
194		return -ENOMEM;
195	}
196
197	/* Allocate memory for cpumask attribute group */
198	nvdimm_pmu_cpumask_group = kzalloc(sizeof(*nvdimm_pmu_cpumask_group), GFP_KERNEL);
199	if (!nvdimm_pmu_cpumask_group) {
200		kfree(pmu_events_attr);
201		kfree(attrs_group);
202		return -ENOMEM;
203	}
204
205	sysfs_attr_init(&pmu_events_attr->attr.attr);
206	pmu_events_attr->attr.attr.name = "cpumask";
207	pmu_events_attr->attr.attr.mode = 0444;
208	pmu_events_attr->attr.show = nvdimm_pmu_cpumask_show;
209	attrs_group[0] = &pmu_events_attr->attr.attr;
210	attrs_group[1] = NULL;
211
212	nvdimm_pmu_cpumask_group->attrs = attrs_group;
213	nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR] = nvdimm_pmu_cpumask_group;
214	return 0;
215}
216
217static int nvdimm_pmu_cpu_hotplug_init(struct nvdimm_pmu *nd_pmu)
218{
219	int nodeid, rc;
220	const struct cpumask *cpumask;
221
222	/*
223	 * Incase of cpu hotplug feature, arch specific code
224	 * can provide required cpumask which can be used
225	 * to get designatd cpu for counter access.
226	 * Check for any active cpu in nd_pmu->arch_cpumask.
227	 */
228	if (!cpumask_empty(&nd_pmu->arch_cpumask)) {
229		nd_pmu->cpu = cpumask_any(&nd_pmu->arch_cpumask);
230	} else {
231		/* pick active cpu from the cpumask of device numa node. */
232		nodeid = dev_to_node(nd_pmu->dev);
233		cpumask = cpumask_of_node(nodeid);
234		nd_pmu->cpu = cpumask_any(cpumask);
235	}
236
237	rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/nvdimm:online",
238				     nvdimm_pmu_cpu_online, nvdimm_pmu_cpu_offline);
239
240	if (rc < 0)
241		return rc;
242
243	nd_pmu->cpuhp_state = rc;
244
245	/* Register the pmu instance for cpu hotplug */
246	rc = cpuhp_state_add_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
247	if (rc) {
248		cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
249		return rc;
250	}
251
252	/* Create cpumask attribute group */
253	rc = create_cpumask_attr_group(nd_pmu);
254	if (rc) {
255		cpuhp_state_remove_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
256		cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
257		return rc;
258	}
259
260	return 0;
261}
262
263static void nvdimm_pmu_free_hotplug_memory(struct nvdimm_pmu *nd_pmu)
264{
265	cpuhp_state_remove_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
266	cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
267
268	if (nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR])
269		kfree(nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR]->attrs);
270	kfree(nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR]);
271}
272
273int register_nvdimm_pmu(struct nvdimm_pmu *nd_pmu, struct platform_device *pdev)
274{
275	int rc;
276
277	if (!nd_pmu || !pdev)
278		return -EINVAL;
279
280	/* event functions like add/del/read/event_init and pmu name should not be NULL */
281	if (WARN_ON_ONCE(!(nd_pmu->pmu.event_init && nd_pmu->pmu.add &&
282			   nd_pmu->pmu.del && nd_pmu->pmu.read && nd_pmu->pmu.name)))
283		return -EINVAL;
284
285	nd_pmu->pmu.attr_groups = kzalloc((NVDIMM_PMU_NULL_ATTR + 1) *
286					  sizeof(struct attribute_group *), GFP_KERNEL);
287	if (!nd_pmu->pmu.attr_groups)
288		return -ENOMEM;
289
290	/*
291	 * Add platform_device->dev pointer to nvdimm_pmu to access
292	 * device data in events functions.
293	 */
294	nd_pmu->dev = &pdev->dev;
295
296	/* Fill attribute groups for the nvdimm pmu device */
297	nd_pmu->pmu.attr_groups[NVDIMM_PMU_FORMAT_ATTR] = &nvdimm_pmu_format_group;
298	nd_pmu->pmu.attr_groups[NVDIMM_PMU_EVENT_ATTR] = &nvdimm_pmu_events_group;
299	nd_pmu->pmu.attr_groups[NVDIMM_PMU_NULL_ATTR] = NULL;
300
301	/* Fill attribute group for cpumask */
302	rc = nvdimm_pmu_cpu_hotplug_init(nd_pmu);
303	if (rc) {
304		pr_info("cpu hotplug feature failed for device: %s\n", nd_pmu->pmu.name);
305		kfree(nd_pmu->pmu.attr_groups);
306		return rc;
307	}
308
309	rc = perf_pmu_register(&nd_pmu->pmu, nd_pmu->pmu.name, -1);
310	if (rc) {
311		nvdimm_pmu_free_hotplug_memory(nd_pmu);
312		kfree(nd_pmu->pmu.attr_groups);
313		return rc;
314	}
315
316	pr_info("%s NVDIMM performance monitor support registered\n",
317		nd_pmu->pmu.name);
318
319	return 0;
320}
321EXPORT_SYMBOL_GPL(register_nvdimm_pmu);
322
323void unregister_nvdimm_pmu(struct nvdimm_pmu *nd_pmu)
324{
325	perf_pmu_unregister(&nd_pmu->pmu);
326	nvdimm_pmu_free_hotplug_memory(nd_pmu);
327	kfree(nd_pmu->pmu.attr_groups);
328	kfree(nd_pmu);
329}
330EXPORT_SYMBOL_GPL(unregister_nvdimm_pmu);
331