1// SPDX-License-Identifier: GPL-2.0
2// Copyright (c) 2019 Mellanox Technologies.
3
4#include "health.h"
5#include "lib/eq.h"
6#include "lib/mlx5.h"
7
8void mlx5e_health_fmsg_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
9{
10	devlink_fmsg_pair_nest_start(fmsg, name);
11	devlink_fmsg_obj_nest_start(fmsg);
12}
13
14void mlx5e_health_fmsg_named_obj_nest_end(struct devlink_fmsg *fmsg)
15{
16	devlink_fmsg_obj_nest_end(fmsg);
17	devlink_fmsg_pair_nest_end(fmsg);
18}
19
20void mlx5e_health_cq_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
21{
22	u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
23	u8 hw_status;
24	void *cqc;
25
26	mlx5_core_query_cq(cq->mdev, &cq->mcq, out);
27	cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
28	hw_status = MLX5_GET(cqc, cqc, status);
29
30	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
31	devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
32	devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
33	devlink_fmsg_u32_pair_put(fmsg, "ci", mlx5_cqwq_get_ci(&cq->wq));
34	devlink_fmsg_u32_pair_put(fmsg, "size", mlx5_cqwq_get_size(&cq->wq));
35	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
36}
37
38void mlx5e_health_cq_common_diag_fmsg(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
39{
40	u8 cq_log_stride;
41	u32 cq_sz;
42
43	cq_sz = mlx5_cqwq_get_size(&cq->wq);
44	cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
45
46	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "CQ");
47	devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
48	devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
49	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
50}
51
52void mlx5e_health_eq_diag_fmsg(struct mlx5_eq_comp *eq, struct devlink_fmsg *fmsg)
53{
54	mlx5e_health_fmsg_named_obj_nest_start(fmsg, "EQ");
55	devlink_fmsg_u8_pair_put(fmsg, "eqn", eq->core.eqn);
56	devlink_fmsg_u32_pair_put(fmsg, "irqn", eq->core.irqn);
57	devlink_fmsg_u32_pair_put(fmsg, "vecidx", eq->core.vecidx);
58	devlink_fmsg_u32_pair_put(fmsg, "ci", eq->core.cons_index);
59	devlink_fmsg_u32_pair_put(fmsg, "size", eq_get_size(&eq->core));
60	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
61}
62
63void mlx5e_health_create_reporters(struct mlx5e_priv *priv)
64{
65	mlx5e_reporter_tx_create(priv);
66	mlx5e_reporter_rx_create(priv);
67}
68
69void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
70{
71	mlx5e_reporter_rx_destroy(priv);
72	mlx5e_reporter_tx_destroy(priv);
73}
74
75void mlx5e_health_channels_update(struct mlx5e_priv *priv)
76{
77	if (priv->tx_reporter)
78		devlink_health_reporter_state_update(priv->tx_reporter,
79						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
80	if (priv->rx_reporter)
81		devlink_health_reporter_state_update(priv->rx_reporter,
82						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
83}
84
85int mlx5e_health_sq_to_ready(struct mlx5_core_dev *mdev, struct net_device *dev, u32 sqn)
86{
87	struct mlx5e_modify_sq_param msp = {};
88	int err;
89
90	msp.curr_state = MLX5_SQC_STATE_ERR;
91	msp.next_state = MLX5_SQC_STATE_RST;
92
93	err = mlx5e_modify_sq(mdev, sqn, &msp);
94	if (err) {
95		netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
96		return err;
97	}
98
99	memset(&msp, 0, sizeof(msp));
100	msp.curr_state = MLX5_SQC_STATE_RST;
101	msp.next_state = MLX5_SQC_STATE_RDY;
102
103	err = mlx5e_modify_sq(mdev, sqn, &msp);
104	if (err) {
105		netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
106		return err;
107	}
108
109	return 0;
110}
111
112int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
113{
114	int err = 0;
115
116	rtnl_lock();
117	mutex_lock(&priv->state_lock);
118
119	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
120		goto out;
121
122	err = mlx5e_safe_reopen_channels(priv);
123
124out:
125	mutex_unlock(&priv->state_lock);
126	rtnl_unlock();
127
128	return err;
129}
130
131int mlx5e_health_channel_eq_recover(struct net_device *dev, struct mlx5_eq_comp *eq,
132				    struct mlx5e_ch_stats *stats)
133{
134	u32 eqe_count;
135
136	netdev_err(dev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
137		   eq->core.eqn, eq->core.cons_index, eq->core.irqn);
138
139	eqe_count = mlx5_eq_poll_irq_disabled(eq);
140	if (!eqe_count)
141		return -EIO;
142
143	netdev_err(dev, "Recovered %d eqes on EQ 0x%x\n",
144		   eqe_count, eq->core.eqn);
145
146	stats->eq_rearm++;
147	return 0;
148}
149
150int mlx5e_health_report(struct mlx5e_priv *priv,
151			struct devlink_health_reporter *reporter, char *err_str,
152			struct mlx5e_err_ctx *err_ctx)
153{
154	netdev_err(priv->netdev, "%s\n", err_str);
155
156	if (!reporter)
157		return err_ctx->recover(err_ctx->ctx);
158
159	return devlink_health_report(reporter, err_str, err_ctx);
160}
161
162#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
163static void mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
164					 const void *value, u32 value_len)
165
166{
167	u32 data_size;
168	u32 offset;
169
170	for (offset = 0; offset < value_len; offset += data_size) {
171		data_size = value_len - offset;
172		if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
173			data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
174		devlink_fmsg_binary_put(fmsg, value + offset, data_size);
175	}
176}
177
178int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
179			       struct devlink_fmsg *fmsg)
180{
181	struct mlx5_core_dev *mdev = priv->mdev;
182	struct mlx5_rsc_dump_cmd *cmd;
183	int cmd_err, err = 0;
184	struct page *page;
185	int size;
186
187	if (IS_ERR_OR_NULL(mdev->rsc_dump))
188		return -EOPNOTSUPP;
189
190	page = alloc_page(GFP_KERNEL);
191	if (!page)
192		return -ENOMEM;
193
194	devlink_fmsg_binary_pair_nest_start(fmsg, "data");
195
196	cmd = mlx5_rsc_dump_cmd_create(mdev, key);
197	if (IS_ERR(cmd)) {
198		err = PTR_ERR(cmd);
199		goto free_page;
200	}
201
202	do {
203		cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
204		if (cmd_err < 0) {
205			err = cmd_err;
206			goto destroy_cmd;
207		}
208
209		mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
210	} while (cmd_err > 0);
211
212destroy_cmd:
213	mlx5_rsc_dump_cmd_destroy(cmd);
214	devlink_fmsg_binary_pair_nest_end(fmsg);
215free_page:
216	__free_page(page);
217	return err;
218}
219
220void mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
221			     int queue_idx, char *lbl)
222{
223	struct mlx5_rsc_key key = {};
224
225	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
226	key.index1 = queue_idx;
227	key.size = PAGE_SIZE;
228	key.num_of_obj1 = 1;
229
230	devlink_fmsg_obj_nest_start(fmsg);
231	mlx5e_health_fmsg_named_obj_nest_start(fmsg, lbl);
232	devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
233	mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
234	mlx5e_health_fmsg_named_obj_nest_end(fmsg);
235	devlink_fmsg_obj_nest_end(fmsg);
236}
237