1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2// Copyright (c) 2018 Mellanox Technologies
3
4#include <linux/mlx5/driver.h>
5
6#include "mlx5_core.h"
7#include "lib/eq.h"
8#include "lib/events.h"
9
10struct mlx5_event_nb {
11	struct mlx5_nb  nb;
12	void           *ctx;
13};
14
15/* General events handlers for the low level mlx5_core driver
16 *
17 * Other Major feature specific events such as
18 * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
19 * separate notifiers callbacks, specifically by those mlx5 components.
20 */
21static int any_notifier(struct notifier_block *, unsigned long, void *);
22static int temp_warn(struct notifier_block *, unsigned long, void *);
23static int port_module(struct notifier_block *, unsigned long, void *);
24static int pcie_core(struct notifier_block *, unsigned long, void *);
25
26/* handler which forwards the event to events->fw_nh, driver notifiers */
27static int forward_event(struct notifier_block *, unsigned long, void *);
28
29static struct mlx5_nb events_nbs_ref[] = {
30	/* Events to be processed by mlx5_core */
31	{.nb.notifier_call = any_notifier,  .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
32	{.nb.notifier_call = temp_warn,     .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
33	{.nb.notifier_call = port_module,   .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
34	{.nb.notifier_call = pcie_core,     .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
35
36	/* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
37	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
38	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
39	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_OBJECT_CHANGE },
40	/* QP/WQ resource events to forward */
41	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
42	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG },
43	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_COMM_EST },
44	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
45	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
46	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
47	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
48	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
49	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
50	/* SRQ events */
51	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
52	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
53};
54
55struct mlx5_events {
56	struct mlx5_core_dev *dev;
57	struct workqueue_struct *wq;
58	struct mlx5_event_nb  notifiers[ARRAY_SIZE(events_nbs_ref)];
59	/* driver notifier chain for fw events */
60	struct atomic_notifier_head fw_nh;
61	/* port module events stats */
62	struct mlx5_pme_stats pme_stats;
63	/*pcie_core*/
64	struct work_struct pcie_core_work;
65	/* driver notifier chain for sw events */
66	struct blocking_notifier_head sw_nh;
67};
68
69static const char *eqe_type_str(u8 type)
70{
71	switch (type) {
72	case MLX5_EVENT_TYPE_COMP:
73		return "MLX5_EVENT_TYPE_COMP";
74	case MLX5_EVENT_TYPE_PATH_MIG:
75		return "MLX5_EVENT_TYPE_PATH_MIG";
76	case MLX5_EVENT_TYPE_COMM_EST:
77		return "MLX5_EVENT_TYPE_COMM_EST";
78	case MLX5_EVENT_TYPE_SQ_DRAINED:
79		return "MLX5_EVENT_TYPE_SQ_DRAINED";
80	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
81		return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
82	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
83		return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
84	case MLX5_EVENT_TYPE_CQ_ERROR:
85		return "MLX5_EVENT_TYPE_CQ_ERROR";
86	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
87		return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
88	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
89		return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
90	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
91		return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
92	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
93		return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
94	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
95		return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
96	case MLX5_EVENT_TYPE_INTERNAL_ERROR:
97		return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
98	case MLX5_EVENT_TYPE_PORT_CHANGE:
99		return "MLX5_EVENT_TYPE_PORT_CHANGE";
100	case MLX5_EVENT_TYPE_GPIO_EVENT:
101		return "MLX5_EVENT_TYPE_GPIO_EVENT";
102	case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
103		return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
104	case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
105		return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
106	case MLX5_EVENT_TYPE_REMOTE_CONFIG:
107		return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
108	case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
109		return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
110	case MLX5_EVENT_TYPE_STALL_EVENT:
111		return "MLX5_EVENT_TYPE_STALL_EVENT";
112	case MLX5_EVENT_TYPE_CMD:
113		return "MLX5_EVENT_TYPE_CMD";
114	case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
115		return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
116	case MLX5_EVENT_TYPE_VHCA_STATE_CHANGE:
117		return "MLX5_EVENT_TYPE_VHCA_STATE_CHANGE";
118	case MLX5_EVENT_TYPE_PAGE_REQUEST:
119		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
120	case MLX5_EVENT_TYPE_PAGE_FAULT:
121		return "MLX5_EVENT_TYPE_PAGE_FAULT";
122	case MLX5_EVENT_TYPE_PPS_EVENT:
123		return "MLX5_EVENT_TYPE_PPS_EVENT";
124	case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
125		return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
126	case MLX5_EVENT_TYPE_FPGA_ERROR:
127		return "MLX5_EVENT_TYPE_FPGA_ERROR";
128	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
129		return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
130	case MLX5_EVENT_TYPE_GENERAL_EVENT:
131		return "MLX5_EVENT_TYPE_GENERAL_EVENT";
132	case MLX5_EVENT_TYPE_MONITOR_COUNTER:
133		return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
134	case MLX5_EVENT_TYPE_DEVICE_TRACER:
135		return "MLX5_EVENT_TYPE_DEVICE_TRACER";
136	case MLX5_EVENT_TYPE_OBJECT_CHANGE:
137		return "MLX5_EVENT_TYPE_OBJECT_CHANGE";
138	default:
139		return "Unrecognized event";
140	}
141}
142
143/* handles all FW events, type == eqe->type */
144static int any_notifier(struct notifier_block *nb,
145			unsigned long type, void *data)
146{
147	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
148	struct mlx5_events   *events   = event_nb->ctx;
149	struct mlx5_eqe      *eqe      = data;
150
151	mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
152		      eqe_type_str(eqe->type), eqe->sub_type);
153	return NOTIFY_OK;
154}
155
156/* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
157static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
158{
159	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
160	struct mlx5_events   *events   = event_nb->ctx;
161	struct mlx5_eqe      *eqe      = data;
162	u64 value_lsb;
163	u64 value_msb;
164
165	value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
166	value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
167
168	mlx5_core_warn(events->dev,
169		       "High temperature on sensors with bit set %llx %llx",
170		       value_msb, value_lsb);
171
172	return NOTIFY_OK;
173}
174
175/* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
176static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
177{
178	switch (status) {
179	case MLX5_MODULE_STATUS_PLUGGED:
180		return "Cable plugged";
181	case MLX5_MODULE_STATUS_UNPLUGGED:
182		return "Cable unplugged";
183	case MLX5_MODULE_STATUS_ERROR:
184		return "Cable error";
185	case MLX5_MODULE_STATUS_DISABLED:
186		return "Cable disabled";
187	default:
188		return "Unknown status";
189	}
190}
191
192static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
193{
194	switch (error) {
195	case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
196		return "Power budget exceeded";
197	case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
198		return "Long Range for non MLNX cable";
199	case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
200		return "Bus stuck (I2C or data shorted)";
201	case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
202		return "No EEPROM/retry timeout";
203	case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
204		return "Enforce part number list";
205	case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
206		return "Unknown identifier";
207	case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
208		return "High Temperature";
209	case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
210		return "Bad or shorted cable/module";
211	case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
212		return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
213	default:
214		return "Unknown error";
215	}
216}
217
218/* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
219static int port_module(struct notifier_block *nb, unsigned long type, void *data)
220{
221	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
222	struct mlx5_events   *events   = event_nb->ctx;
223	struct mlx5_eqe      *eqe      = data;
224
225	enum port_module_event_status_type module_status;
226	enum port_module_event_error_type error_type;
227	struct mlx5_eqe_port_module *module_event_eqe;
228	const char *status_str;
229	u8 module_num;
230
231	module_event_eqe = &eqe->data.port_module;
232	module_status = module_event_eqe->module_status &
233			PORT_MODULE_EVENT_MODULE_STATUS_MASK;
234	error_type = module_event_eqe->error_type &
235		     PORT_MODULE_EVENT_ERROR_TYPE_MASK;
236
237	if (module_status < MLX5_MODULE_STATUS_NUM)
238		events->pme_stats.status_counters[module_status]++;
239
240	if (module_status == MLX5_MODULE_STATUS_ERROR)
241		if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
242			events->pme_stats.error_counters[error_type]++;
243
244	if (!printk_ratelimit())
245		return NOTIFY_OK;
246
247	module_num = module_event_eqe->module;
248	status_str = mlx5_pme_status_to_string(module_status);
249	if (module_status == MLX5_MODULE_STATUS_ERROR) {
250		const char *error_str = mlx5_pme_error_to_string(error_type);
251
252		mlx5_core_err(events->dev,
253			      "Port module event[error]: module %u, %s, %s\n",
254			      module_num, status_str, error_str);
255	} else {
256		mlx5_core_info(events->dev,
257			       "Port module event: module %u, %s\n",
258			       module_num, status_str);
259	}
260
261	return NOTIFY_OK;
262}
263
264enum {
265	MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
266	MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
267	MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
268};
269
270static void mlx5_pcie_event(struct work_struct *work)
271{
272	u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
273	u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
274	struct mlx5_events *events;
275	struct mlx5_core_dev *dev;
276	u8 power_status;
277	u16 pci_power;
278
279	events = container_of(work, struct mlx5_events, pcie_core_work);
280	dev  = events->dev;
281
282	if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
283		return;
284
285	mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
286			     MLX5_REG_MPEIN, 0, 0);
287	power_status = MLX5_GET(mpein_reg, out, pwr_status);
288	pci_power = MLX5_GET(mpein_reg, out, pci_power);
289
290	switch (power_status) {
291	case MLX5_PCI_POWER_COULD_NOT_BE_READ:
292		mlx5_core_info_rl(dev,
293				  "PCIe slot power capability was not advertised.\n");
294		break;
295	case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
296		mlx5_core_warn_rl(dev,
297				  "Detected insufficient power on the PCIe slot (%uW).\n",
298				  pci_power);
299		break;
300	case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
301		mlx5_core_info_rl(dev,
302				  "PCIe slot advertised sufficient power (%uW).\n",
303				  pci_power);
304		break;
305	}
306}
307
308static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
309{
310	struct mlx5_event_nb    *event_nb = mlx5_nb_cof(nb,
311							struct mlx5_event_nb,
312							nb);
313	struct mlx5_events      *events   = event_nb->ctx;
314	struct mlx5_eqe         *eqe      = data;
315
316	switch (eqe->sub_type) {
317	case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
318			queue_work(events->wq, &events->pcie_core_work);
319		break;
320	default:
321		return NOTIFY_DONE;
322	}
323
324	return NOTIFY_OK;
325}
326
327void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
328{
329	*stats = dev->priv.events->pme_stats;
330}
331
332/* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
333static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
334{
335	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
336	struct mlx5_events   *events   = event_nb->ctx;
337	struct mlx5_eqe      *eqe      = data;
338
339	mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
340		      eqe_type_str(eqe->type), eqe->sub_type);
341	atomic_notifier_call_chain(&events->fw_nh, event, data);
342	return NOTIFY_OK;
343}
344
345int mlx5_events_init(struct mlx5_core_dev *dev)
346{
347	struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
348
349	if (!events)
350		return -ENOMEM;
351
352	ATOMIC_INIT_NOTIFIER_HEAD(&events->fw_nh);
353	events->dev = dev;
354	dev->priv.events = events;
355	events->wq = create_singlethread_workqueue("mlx5_events");
356	if (!events->wq) {
357		kfree(events);
358		return -ENOMEM;
359	}
360	INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
361	BLOCKING_INIT_NOTIFIER_HEAD(&events->sw_nh);
362
363	return 0;
364}
365
366void mlx5_events_cleanup(struct mlx5_core_dev *dev)
367{
368	destroy_workqueue(dev->priv.events->wq);
369	kvfree(dev->priv.events);
370}
371
372void mlx5_events_start(struct mlx5_core_dev *dev)
373{
374	struct mlx5_events *events = dev->priv.events;
375	int i;
376
377	for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
378		events->notifiers[i].nb  = events_nbs_ref[i];
379		events->notifiers[i].ctx = events;
380		mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
381	}
382}
383
384void mlx5_events_stop(struct mlx5_core_dev *dev)
385{
386	struct mlx5_events *events = dev->priv.events;
387	int i;
388
389	for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
390		mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
391	flush_workqueue(events->wq);
392}
393
394/* This API is used only for processing and forwarding firmware
395 * events to mlx5 consumer.
396 */
397int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
398{
399	struct mlx5_events *events = dev->priv.events;
400
401	return atomic_notifier_chain_register(&events->fw_nh, nb);
402}
403EXPORT_SYMBOL(mlx5_notifier_register);
404
405int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
406{
407	struct mlx5_events *events = dev->priv.events;
408
409	return atomic_notifier_chain_unregister(&events->fw_nh, nb);
410}
411EXPORT_SYMBOL(mlx5_notifier_unregister);
412
413int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
414{
415	return atomic_notifier_call_chain(&events->fw_nh, event, data);
416}
417
418/* This API is used only for processing and forwarding driver-specific
419 * events to mlx5 consumers.
420 */
421int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
422{
423	struct mlx5_events *events = dev->priv.events;
424
425	return blocking_notifier_chain_register(&events->sw_nh, nb);
426}
427EXPORT_SYMBOL(mlx5_blocking_notifier_register);
428
429int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
430{
431	struct mlx5_events *events = dev->priv.events;
432
433	return blocking_notifier_chain_unregister(&events->sw_nh, nb);
434}
435EXPORT_SYMBOL(mlx5_blocking_notifier_unregister);
436
437int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event,
438				      void *data)
439{
440	struct mlx5_events *events = dev->priv.events;
441
442	return blocking_notifier_call_chain(&events->sw_nh, event, data);
443}
444