1/*
2 * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/etherdevice.h>
34#include <linux/mlx5/driver.h>
35
36#include "mlx5_core.h"
37#include "lib/mlx5.h"
38#include "lib/eq.h"
39#include "fpga/core.h"
40#include "fpga/conn.h"
41
42static const char *const mlx5_fpga_error_strings[] = {
43	"Null Syndrome",
44	"Corrupted DDR",
45	"Flash Timeout",
46	"Internal Link Error",
47	"Watchdog HW Failure",
48	"I2C Failure",
49	"Image Changed",
50	"Temperature Critical",
51};
52
53static const char * const mlx5_fpga_qp_error_strings[] = {
54	"Null Syndrome",
55	"Retry Counter Expired",
56	"RNR Expired",
57};
58static struct mlx5_fpga_device *mlx5_fpga_device_alloc(void)
59{
60	struct mlx5_fpga_device *fdev;
61
62	fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
63	if (!fdev)
64		return NULL;
65
66	spin_lock_init(&fdev->state_lock);
67	fdev->state = MLX5_FPGA_STATUS_NONE;
68	return fdev;
69}
70
71static const char *mlx5_fpga_image_name(enum mlx5_fpga_image image)
72{
73	switch (image) {
74	case MLX5_FPGA_IMAGE_USER:
75		return "user";
76	case MLX5_FPGA_IMAGE_FACTORY:
77		return "factory";
78	default:
79		return "unknown";
80	}
81}
82
83static const char *mlx5_fpga_name(u32 fpga_id)
84{
85	static char ret[32];
86
87	switch (fpga_id) {
88	case MLX5_FPGA_NEWTON:
89		return "Newton";
90	case MLX5_FPGA_EDISON:
91		return "Edison";
92	case MLX5_FPGA_MORSE:
93		return "Morse";
94	case MLX5_FPGA_MORSEQ:
95		return "MorseQ";
96	}
97
98	snprintf(ret, sizeof(ret), "Unknown %d", fpga_id);
99	return ret;
100}
101
102static int mlx5_is_fpga_lookaside(u32 fpga_id)
103{
104	return fpga_id != MLX5_FPGA_NEWTON && fpga_id != MLX5_FPGA_EDISON;
105}
106
107static int mlx5_fpga_device_load_check(struct mlx5_fpga_device *fdev)
108{
109	struct mlx5_fpga_query query;
110	int err;
111
112	err = mlx5_fpga_query(fdev->mdev, &query);
113	if (err) {
114		mlx5_fpga_err(fdev, "Failed to query status: %d\n", err);
115		return err;
116	}
117
118	fdev->last_admin_image = query.admin_image;
119	fdev->last_oper_image = query.oper_image;
120
121	mlx5_fpga_info(fdev, "Status %u; Admin image %u; Oper image %u\n",
122		       query.status, query.admin_image, query.oper_image);
123
124	/* for FPGA lookaside projects FPGA load status is not important */
125	if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
126		return 0;
127
128	if (query.status != MLX5_FPGA_STATUS_SUCCESS) {
129		mlx5_fpga_err(fdev, "%s image failed to load; status %u\n",
130			      mlx5_fpga_image_name(fdev->last_oper_image),
131			      query.status);
132		return -EIO;
133	}
134
135	return 0;
136}
137
138static int mlx5_fpga_device_brb(struct mlx5_fpga_device *fdev)
139{
140	int err;
141	struct mlx5_core_dev *mdev = fdev->mdev;
142
143	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
144	if (err) {
145		mlx5_fpga_err(fdev, "Failed to set bypass on: %d\n", err);
146		return err;
147	}
148	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_RESET_SANDBOX);
149	if (err) {
150		mlx5_fpga_err(fdev, "Failed to reset SBU: %d\n", err);
151		return err;
152	}
153	err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_OFF);
154	if (err) {
155		mlx5_fpga_err(fdev, "Failed to set bypass off: %d\n", err);
156		return err;
157	}
158	return 0;
159}
160
161static int mlx5_fpga_event(struct mlx5_fpga_device *, unsigned long, void *);
162
163static int fpga_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
164{
165	struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_err_nb);
166
167	return mlx5_fpga_event(fdev, event, eqe);
168}
169
170static int fpga_qp_err_event(struct notifier_block *nb, unsigned long event, void *eqe)
171{
172	struct mlx5_fpga_device *fdev = mlx5_nb_cof(nb, struct mlx5_fpga_device, fpga_qp_err_nb);
173
174	return mlx5_fpga_event(fdev, event, eqe);
175}
176
177int mlx5_fpga_device_start(struct mlx5_core_dev *mdev)
178{
179	struct mlx5_fpga_device *fdev = mdev->fpga;
180	unsigned int max_num_qps;
181	unsigned long flags;
182	u32 fpga_id;
183	int err;
184
185	if (!fdev)
186		return 0;
187
188	err = mlx5_fpga_caps(fdev->mdev);
189	if (err)
190		goto out;
191
192	err = mlx5_fpga_device_load_check(fdev);
193	if (err)
194		goto out;
195
196	fpga_id = MLX5_CAP_FPGA(fdev->mdev, fpga_id);
197	mlx5_fpga_info(fdev, "FPGA card %s:%u\n", mlx5_fpga_name(fpga_id), fpga_id);
198
199	/* No QPs if FPGA does not participate in net processing */
200	if (mlx5_is_fpga_lookaside(fpga_id))
201		goto out;
202
203	mlx5_fpga_info(fdev, "%s(%d): image, version %u; SBU %06x:%04x version %d\n",
204		       mlx5_fpga_image_name(fdev->last_oper_image),
205		       fdev->last_oper_image,
206		       MLX5_CAP_FPGA(fdev->mdev, image_version),
207		       MLX5_CAP_FPGA(fdev->mdev, ieee_vendor_id),
208		       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_id),
209		       MLX5_CAP_FPGA(fdev->mdev, sandbox_product_version));
210
211	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
212	if (!max_num_qps) {
213		mlx5_fpga_err(fdev, "FPGA reports 0 QPs in SHELL_CAPS\n");
214		err = -ENOTSUPP;
215		goto out;
216	}
217
218	err = mlx5_core_reserve_gids(mdev, max_num_qps);
219	if (err)
220		goto out;
221
222	MLX5_NB_INIT(&fdev->fpga_err_nb, fpga_err_event, FPGA_ERROR);
223	MLX5_NB_INIT(&fdev->fpga_qp_err_nb, fpga_qp_err_event, FPGA_QP_ERROR);
224	mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_err_nb);
225	mlx5_eq_notifier_register(fdev->mdev, &fdev->fpga_qp_err_nb);
226
227	err = mlx5_fpga_conn_device_init(fdev);
228	if (err)
229		goto err_rsvd_gid;
230
231	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
232		err = mlx5_fpga_device_brb(fdev);
233		if (err)
234			goto err_conn_init;
235	}
236
237	goto out;
238
239err_conn_init:
240	mlx5_fpga_conn_device_cleanup(fdev);
241
242err_rsvd_gid:
243	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
244	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
245	mlx5_core_unreserve_gids(mdev, max_num_qps);
246out:
247	spin_lock_irqsave(&fdev->state_lock, flags);
248	fdev->state = err ? MLX5_FPGA_STATUS_FAILURE : MLX5_FPGA_STATUS_SUCCESS;
249	spin_unlock_irqrestore(&fdev->state_lock, flags);
250	return err;
251}
252
253int mlx5_fpga_init(struct mlx5_core_dev *mdev)
254{
255	struct mlx5_fpga_device *fdev;
256
257	if (!MLX5_CAP_GEN(mdev, fpga)) {
258		mlx5_core_dbg(mdev, "FPGA capability not present\n");
259		return 0;
260	}
261
262	mlx5_core_dbg(mdev, "Initializing FPGA\n");
263
264	fdev = mlx5_fpga_device_alloc();
265	if (!fdev)
266		return -ENOMEM;
267
268	fdev->mdev = mdev;
269	mdev->fpga = fdev;
270
271	return 0;
272}
273
274void mlx5_fpga_device_stop(struct mlx5_core_dev *mdev)
275{
276	struct mlx5_fpga_device *fdev = mdev->fpga;
277	unsigned int max_num_qps;
278	unsigned long flags;
279	int err;
280
281	if (!fdev)
282		return;
283
284	if (mlx5_is_fpga_lookaside(MLX5_CAP_FPGA(fdev->mdev, fpga_id)))
285		return;
286
287	spin_lock_irqsave(&fdev->state_lock, flags);
288	if (fdev->state != MLX5_FPGA_STATUS_SUCCESS) {
289		spin_unlock_irqrestore(&fdev->state_lock, flags);
290		return;
291	}
292	fdev->state = MLX5_FPGA_STATUS_NONE;
293	spin_unlock_irqrestore(&fdev->state_lock, flags);
294
295	if (fdev->last_oper_image == MLX5_FPGA_IMAGE_USER) {
296		err = mlx5_fpga_ctrl_op(mdev, MLX5_FPGA_CTRL_OPERATION_SANDBOX_BYPASS_ON);
297		if (err)
298			mlx5_fpga_err(fdev, "Failed to re-set SBU bypass on: %d\n",
299				      err);
300	}
301
302	mlx5_fpga_conn_device_cleanup(fdev);
303	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_err_nb);
304	mlx5_eq_notifier_unregister(fdev->mdev, &fdev->fpga_qp_err_nb);
305
306	max_num_qps = MLX5_CAP_FPGA(mdev, shell_caps.max_num_qps);
307	mlx5_core_unreserve_gids(mdev, max_num_qps);
308}
309
310void mlx5_fpga_cleanup(struct mlx5_core_dev *mdev)
311{
312	struct mlx5_fpga_device *fdev = mdev->fpga;
313
314	mlx5_fpga_device_stop(mdev);
315	kfree(fdev);
316	mdev->fpga = NULL;
317}
318
319static const char *mlx5_fpga_syndrome_to_string(u8 syndrome)
320{
321	if (syndrome < ARRAY_SIZE(mlx5_fpga_error_strings))
322		return mlx5_fpga_error_strings[syndrome];
323	return "Unknown";
324}
325
326static const char *mlx5_fpga_qp_syndrome_to_string(u8 syndrome)
327{
328	if (syndrome < ARRAY_SIZE(mlx5_fpga_qp_error_strings))
329		return mlx5_fpga_qp_error_strings[syndrome];
330	return "Unknown";
331}
332
333static int mlx5_fpga_event(struct mlx5_fpga_device *fdev,
334			   unsigned long event, void *eqe)
335{
336	void *data = ((struct mlx5_eqe *)eqe)->data.raw;
337	const char *event_name;
338	bool teardown = false;
339	unsigned long flags;
340	u8 syndrome;
341
342	switch (event) {
343	case MLX5_EVENT_TYPE_FPGA_ERROR:
344		syndrome = MLX5_GET(fpga_error_event, data, syndrome);
345		event_name = mlx5_fpga_syndrome_to_string(syndrome);
346		break;
347	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
348		syndrome = MLX5_GET(fpga_qp_error_event, data, syndrome);
349		event_name = mlx5_fpga_qp_syndrome_to_string(syndrome);
350		break;
351	default:
352		return NOTIFY_DONE;
353	}
354
355	spin_lock_irqsave(&fdev->state_lock, flags);
356	switch (fdev->state) {
357	case MLX5_FPGA_STATUS_SUCCESS:
358		mlx5_fpga_warn(fdev, "Error %u: %s\n", syndrome, event_name);
359		teardown = true;
360		break;
361	default:
362		mlx5_fpga_warn_ratelimited(fdev, "Unexpected error event %u: %s\n",
363					   syndrome, event_name);
364	}
365	spin_unlock_irqrestore(&fdev->state_lock, flags);
366	/* We tear-down the card's interfaces and functionality because
367	 * the FPGA bump-on-the-wire is misbehaving and we lose ability
368	 * to communicate with the network. User may still be able to
369	 * recover by re-programming or debugging the FPGA
370	 */
371	if (teardown)
372		mlx5_trigger_health_work(fdev->mdev);
373
374	return NOTIFY_OK;
375}
376