mlx5_main.c revision 341958
1/*-
2 * Copyright (c) 2013-2017, Mellanox Technologies, Ltd.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c 341958 2018-12-12 12:46:12Z hselasky $
26 */
27
28#define	LINUXKPI_PARAM_PREFIX mlx5_
29
30#include <linux/kmod.h>
31#include <linux/module.h>
32#include <linux/errno.h>
33#include <linux/pci.h>
34#include <linux/dma-mapping.h>
35#include <linux/slab.h>
36#include <linux/io-mapping.h>
37#include <linux/interrupt.h>
38#include <dev/mlx5/driver.h>
39#include <dev/mlx5/cq.h>
40#include <dev/mlx5/qp.h>
41#include <dev/mlx5/srq.h>
42#include <linux/delay.h>
43#include <dev/mlx5/mlx5_ifc.h>
44#include <dev/mlx5/mlx5_fpga/core.h>
45#include <dev/mlx5/mlx5_lib/mlx5.h>
46#include "mlx5_core.h"
47#include "fs_core.h"
48
49static const char mlx5_version[] = "Mellanox Core driver "
50	DRIVER_VERSION " (" DRIVER_RELDATE ")";
51MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
52MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
53MODULE_LICENSE("Dual BSD/GPL");
54#if (__FreeBSD_version >= 1100000)
55MODULE_DEPEND(mlx5, linuxkpi, 1, 1, 1);
56#endif
57MODULE_VERSION(mlx5, 1);
58
59int mlx5_core_debug_mask;
60module_param_named(debug_mask, mlx5_core_debug_mask, int, 0644);
61MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
62
63#define MLX5_DEFAULT_PROF	2
64static int prof_sel = MLX5_DEFAULT_PROF;
65module_param_named(prof_sel, prof_sel, int, 0444);
66MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
67
68SYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW, 0, "mlx5 HW controls");
69
70#define NUMA_NO_NODE       -1
71
72static LIST_HEAD(intf_list);
73static LIST_HEAD(dev_list);
74static DEFINE_MUTEX(intf_mutex);
75
76struct mlx5_device_context {
77	struct list_head	list;
78	struct mlx5_interface  *intf;
79	void		       *context;
80};
81
82enum {
83	MLX5_ATOMIC_REQ_MODE_BE = 0x0,
84	MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
85};
86
87static struct mlx5_profile profiles[] = {
88	[0] = {
89		.mask           = 0,
90	},
91	[1] = {
92		.mask		= MLX5_PROF_MASK_QP_SIZE,
93		.log_max_qp	= 12,
94	},
95	[2] = {
96		.mask		= MLX5_PROF_MASK_QP_SIZE |
97				  MLX5_PROF_MASK_MR_CACHE,
98		.log_max_qp	= 17,
99		.mr_cache[0]	= {
100			.size	= 500,
101			.limit	= 250
102		},
103		.mr_cache[1]	= {
104			.size	= 500,
105			.limit	= 250
106		},
107		.mr_cache[2]	= {
108			.size	= 500,
109			.limit	= 250
110		},
111		.mr_cache[3]	= {
112			.size	= 500,
113			.limit	= 250
114		},
115		.mr_cache[4]	= {
116			.size	= 500,
117			.limit	= 250
118		},
119		.mr_cache[5]	= {
120			.size	= 500,
121			.limit	= 250
122		},
123		.mr_cache[6]	= {
124			.size	= 500,
125			.limit	= 250
126		},
127		.mr_cache[7]	= {
128			.size	= 500,
129			.limit	= 250
130		},
131		.mr_cache[8]	= {
132			.size	= 500,
133			.limit	= 250
134		},
135		.mr_cache[9]	= {
136			.size	= 500,
137			.limit	= 250
138		},
139		.mr_cache[10]	= {
140			.size	= 500,
141			.limit	= 250
142		},
143		.mr_cache[11]	= {
144			.size	= 500,
145			.limit	= 250
146		},
147		.mr_cache[12]	= {
148			.size	= 64,
149			.limit	= 32
150		},
151		.mr_cache[13]	= {
152			.size	= 32,
153			.limit	= 16
154		},
155		.mr_cache[14]	= {
156			.size	= 16,
157			.limit	= 8
158		},
159	},
160	[3] = {
161		.mask		= MLX5_PROF_MASK_QP_SIZE,
162		.log_max_qp	= 17,
163	},
164};
165
166static int set_dma_caps(struct pci_dev *pdev)
167{
168	int err;
169
170	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
171	if (err) {
172		device_printf((&pdev->dev)->bsddev, "WARN: ""Warning: couldn't set 64-bit PCI DMA mask\n");
173		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
174		if (err) {
175			device_printf((&pdev->dev)->bsddev, "ERR: ""Can't set PCI DMA mask, aborting\n");
176			return err;
177		}
178	}
179
180	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
181	if (err) {
182		device_printf((&pdev->dev)->bsddev, "WARN: ""Warning: couldn't set 64-bit consistent PCI DMA mask\n");
183		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
184		if (err) {
185			device_printf((&pdev->dev)->bsddev, "ERR: ""Can't set consistent PCI DMA mask, aborting\n");
186			return err;
187		}
188	}
189
190	dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
191	return err;
192}
193
194static int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
195{
196	struct pci_dev *pdev = dev->pdev;
197	int err = 0;
198
199	mutex_lock(&dev->pci_status_mutex);
200	if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
201		err = pci_enable_device(pdev);
202		if (!err)
203			dev->pci_status = MLX5_PCI_STATUS_ENABLED;
204	}
205	mutex_unlock(&dev->pci_status_mutex);
206
207	return err;
208}
209
210static void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
211{
212	struct pci_dev *pdev = dev->pdev;
213
214	mutex_lock(&dev->pci_status_mutex);
215	if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
216		pci_disable_device(pdev);
217		dev->pci_status = MLX5_PCI_STATUS_DISABLED;
218	}
219	mutex_unlock(&dev->pci_status_mutex);
220}
221
222static int request_bar(struct pci_dev *pdev)
223{
224	int err = 0;
225
226	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
227		device_printf((&pdev->dev)->bsddev, "ERR: ""Missing registers BAR, aborting\n");
228		return -ENODEV;
229	}
230
231	err = pci_request_regions(pdev, DRIVER_NAME);
232	if (err)
233		device_printf((&pdev->dev)->bsddev, "ERR: ""Couldn't get PCI resources, aborting\n");
234
235	return err;
236}
237
238static void release_bar(struct pci_dev *pdev)
239{
240	pci_release_regions(pdev);
241}
242
243static int mlx5_enable_msix(struct mlx5_core_dev *dev)
244{
245	struct mlx5_priv *priv = &dev->priv;
246	struct mlx5_eq_table *table = &priv->eq_table;
247	int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
248	int limit = dev->msix_eqvec;
249	int nvec = MLX5_EQ_VEC_COMP_BASE;
250	int i;
251
252	if (limit > 0)
253		nvec += limit;
254	else
255		nvec += MLX5_CAP_GEN(dev, num_ports) * num_online_cpus();
256
257	nvec = min_t(int, nvec, num_eqs);
258	if (nvec <= MLX5_EQ_VEC_COMP_BASE)
259		return -ENOMEM;
260
261	priv->msix_arr = kzalloc(nvec * sizeof(*priv->msix_arr), GFP_KERNEL);
262
263	priv->irq_info = kzalloc(nvec * sizeof(*priv->irq_info), GFP_KERNEL);
264
265	for (i = 0; i < nvec; i++)
266		priv->msix_arr[i].entry = i;
267
268	nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr,
269				     MLX5_EQ_VEC_COMP_BASE + 1, nvec);
270	if (nvec < 0)
271		return nvec;
272
273	table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
274
275	return 0;
276
277}
278
279static void mlx5_disable_msix(struct mlx5_core_dev *dev)
280{
281	struct mlx5_priv *priv = &dev->priv;
282
283	pci_disable_msix(dev->pdev);
284	kfree(priv->irq_info);
285	kfree(priv->msix_arr);
286}
287
288struct mlx5_reg_host_endianess {
289	u8	he;
290	u8      rsvd[15];
291};
292
293
294#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
295
296enum {
297	MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
298				MLX5_DEV_CAP_FLAG_DCT |
299				MLX5_DEV_CAP_FLAG_DRAIN_SIGERR,
300};
301
302static u16 to_fw_pkey_sz(u32 size)
303{
304	switch (size) {
305	case 128:
306		return 0;
307	case 256:
308		return 1;
309	case 512:
310		return 2;
311	case 1024:
312		return 3;
313	case 2048:
314		return 4;
315	case 4096:
316		return 5;
317	default:
318		printf("mlx5_core: WARN: ""invalid pkey table size %d\n", size);
319		return 0;
320	}
321}
322
323static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
324				   enum mlx5_cap_type cap_type,
325				   enum mlx5_cap_mode cap_mode)
326{
327	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
328	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
329	void *out, *hca_caps;
330	u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
331	int err;
332
333	memset(in, 0, sizeof(in));
334	out = kzalloc(out_sz, GFP_KERNEL);
335
336	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
337	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
338	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
339	if (err) {
340		mlx5_core_warn(dev,
341			       "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
342			       cap_type, cap_mode, err);
343		goto query_ex;
344	}
345
346	hca_caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
347
348	switch (cap_mode) {
349	case HCA_CAP_OPMOD_GET_MAX:
350		memcpy(dev->hca_caps_max[cap_type], hca_caps,
351		       MLX5_UN_SZ_BYTES(hca_cap_union));
352		break;
353	case HCA_CAP_OPMOD_GET_CUR:
354		memcpy(dev->hca_caps_cur[cap_type], hca_caps,
355		       MLX5_UN_SZ_BYTES(hca_cap_union));
356		break;
357	default:
358		mlx5_core_warn(dev,
359			       "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
360			       cap_type, cap_mode);
361		err = -EINVAL;
362		break;
363	}
364query_ex:
365	kfree(out);
366	return err;
367}
368
369int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
370{
371	int ret;
372
373	ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
374	if (ret)
375		return ret;
376
377	return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
378}
379
380static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
381{
382	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
383
384	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
385
386	return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
387}
388
389static int handle_hca_cap(struct mlx5_core_dev *dev)
390{
391	void *set_ctx = NULL;
392	struct mlx5_profile *prof = dev->profile;
393	int err = -ENOMEM;
394	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
395	void *set_hca_cap;
396
397	set_ctx = kzalloc(set_sz, GFP_KERNEL);
398
399	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
400	if (err)
401		goto query_ex;
402
403	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
404				   capability);
405	memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL],
406	       MLX5_ST_SZ_BYTES(cmd_hca_cap));
407
408	mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
409		      mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
410		      128);
411	/* we limit the size of the pkey table to 128 entries for now */
412	MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
413		 to_fw_pkey_sz(128));
414
415	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
416		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
417			 prof->log_max_qp);
418
419	/* disable cmdif checksum */
420	MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
421
422	/* enable drain sigerr */
423	MLX5_SET(cmd_hca_cap, set_hca_cap, drain_sigerr, 1);
424
425	MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
426
427	err = set_caps(dev, set_ctx, set_sz);
428
429query_ex:
430	kfree(set_ctx);
431	return err;
432}
433
434static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
435{
436	void *set_ctx;
437	void *set_hca_cap;
438	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
439	int req_endianness;
440	int err;
441
442	if (MLX5_CAP_GEN(dev, atomic)) {
443		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
444		if (err)
445			return err;
446	} else {
447		return 0;
448	}
449
450	req_endianness =
451		MLX5_CAP_ATOMIC(dev,
452				supported_atomic_req_8B_endianess_mode_1);
453
454	if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
455		return 0;
456
457	set_ctx = kzalloc(set_sz, GFP_KERNEL);
458	if (!set_ctx)
459		return -ENOMEM;
460
461	MLX5_SET(set_hca_cap_in, set_ctx, op_mod,
462		 MLX5_SET_HCA_CAP_OP_MOD_ATOMIC << 1);
463	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
464
465	/* Set requestor to host endianness */
466	MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
467		 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
468
469	err = set_caps(dev, set_ctx, set_sz);
470
471	kfree(set_ctx);
472	return err;
473}
474
475static int set_hca_ctrl(struct mlx5_core_dev *dev)
476{
477	struct mlx5_reg_host_endianess he_in;
478	struct mlx5_reg_host_endianess he_out;
479	int err;
480
481	if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
482	    !MLX5_CAP_GEN(dev, roce))
483		return 0;
484
485	memset(&he_in, 0, sizeof(he_in));
486	he_in.he = MLX5_SET_HOST_ENDIANNESS;
487	err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
488					&he_out, sizeof(he_out),
489					MLX5_REG_HOST_ENDIANNESS, 0, 1);
490	return err;
491}
492
493static int mlx5_core_enable_hca(struct mlx5_core_dev *dev)
494{
495	u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
496	u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0};
497
498	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
499	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
500}
501
502static int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
503{
504	u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
505	u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0};
506
507	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
508	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
509}
510
511static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
512{
513	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0};
514	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
515	u32 sup_issi;
516	int err;
517
518	MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
519
520	err = mlx5_cmd_exec(dev, query_in, sizeof(query_in), query_out, sizeof(query_out));
521	if (err) {
522		u32 syndrome;
523		u8 status;
524
525		mlx5_cmd_mbox_status(query_out, &status, &syndrome);
526		if (status == MLX5_CMD_STAT_BAD_OP_ERR) {
527			pr_debug("Only ISSI 0 is supported\n");
528			return 0;
529		}
530
531		printf("mlx5_core: ERR: ""failed to query ISSI\n");
532		return err;
533	}
534
535	sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
536
537	if (sup_issi & (1 << 1)) {
538		u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]	 = {0};
539		u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
540
541		MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
542		MLX5_SET(set_issi_in, set_in, current_issi, 1);
543
544		err = mlx5_cmd_exec(dev, set_in, sizeof(set_in), set_out, sizeof(set_out));
545		if (err) {
546			printf("mlx5_core: ERR: ""failed to set ISSI=1 err(%d)\n", err);
547			return err;
548		}
549
550		dev->issi = 1;
551
552		return 0;
553	} else if (sup_issi & (1 << 0)) {
554		return 0;
555	}
556
557	return -ENOTSUPP;
558}
559
560
561int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn)
562{
563	struct mlx5_eq_table *table = &dev->priv.eq_table;
564	struct mlx5_eq *eq;
565	int err = -ENOENT;
566
567	spin_lock(&table->lock);
568	list_for_each_entry(eq, &table->comp_eqs_list, list) {
569		if (eq->index == vector) {
570			*eqn = eq->eqn;
571			*irqn = eq->irqn;
572			err = 0;
573			break;
574		}
575	}
576	spin_unlock(&table->lock);
577
578	return err;
579}
580EXPORT_SYMBOL(mlx5_vector2eqn);
581
582int mlx5_rename_eq(struct mlx5_core_dev *dev, int eq_ix, char *name)
583{
584	struct mlx5_priv *priv = &dev->priv;
585	struct mlx5_eq_table *table = &priv->eq_table;
586	struct mlx5_eq *eq;
587	int err = -ENOENT;
588
589	spin_lock(&table->lock);
590	list_for_each_entry(eq, &table->comp_eqs_list, list) {
591		if (eq->index == eq_ix) {
592			int irq_ix = eq_ix + MLX5_EQ_VEC_COMP_BASE;
593
594			snprintf(priv->irq_info[irq_ix].name, MLX5_MAX_IRQ_NAME,
595				 "%s-%d", name, eq_ix);
596
597			err = 0;
598			break;
599		}
600	}
601	spin_unlock(&table->lock);
602
603	return err;
604}
605
606static void free_comp_eqs(struct mlx5_core_dev *dev)
607{
608	struct mlx5_eq_table *table = &dev->priv.eq_table;
609	struct mlx5_eq *eq, *n;
610
611	spin_lock(&table->lock);
612	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
613		list_del(&eq->list);
614		spin_unlock(&table->lock);
615		if (mlx5_destroy_unmap_eq(dev, eq))
616			mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
617				       eq->eqn);
618		kfree(eq);
619		spin_lock(&table->lock);
620	}
621	spin_unlock(&table->lock);
622}
623
624static int alloc_comp_eqs(struct mlx5_core_dev *dev)
625{
626	struct mlx5_eq_table *table = &dev->priv.eq_table;
627	char name[MLX5_MAX_IRQ_NAME];
628	struct mlx5_eq *eq;
629	int ncomp_vec;
630	int nent;
631	int err;
632	int i;
633
634	INIT_LIST_HEAD(&table->comp_eqs_list);
635	ncomp_vec = table->num_comp_vectors;
636	nent = MLX5_COMP_EQ_SIZE;
637	for (i = 0; i < ncomp_vec; i++) {
638		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
639
640		snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i);
641		err = mlx5_create_map_eq(dev, eq,
642					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
643					 name, &dev->priv.uuari.uars[0]);
644		if (err) {
645			kfree(eq);
646			goto clean;
647		}
648		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
649		eq->index = i;
650		spin_lock(&table->lock);
651		list_add_tail(&eq->list, &table->comp_eqs_list);
652		spin_unlock(&table->lock);
653	}
654
655	return 0;
656
657clean:
658	free_comp_eqs(dev);
659	return err;
660}
661
662static int map_bf_area(struct mlx5_core_dev *dev)
663{
664	resource_size_t bf_start = pci_resource_start(dev->pdev, 0);
665	resource_size_t bf_len = pci_resource_len(dev->pdev, 0);
666
667	dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len);
668
669	return dev->priv.bf_mapping ? 0 : -ENOMEM;
670}
671
672static void unmap_bf_area(struct mlx5_core_dev *dev)
673{
674	if (dev->priv.bf_mapping)
675		io_mapping_free(dev->priv.bf_mapping);
676}
677
678static inline int fw_initializing(struct mlx5_core_dev *dev)
679{
680	return ioread32be(&dev->iseg->initializing) >> 31;
681}
682
683static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
684{
685	u64 end = jiffies + msecs_to_jiffies(max_wait_mili);
686	int err = 0;
687
688	while (fw_initializing(dev)) {
689		if (time_after(jiffies, end)) {
690			err = -EBUSY;
691			break;
692		}
693		msleep(FW_INIT_WAIT_MS);
694	}
695
696	return err;
697}
698
699static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
700{
701	struct mlx5_device_context *dev_ctx;
702	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
703
704	dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
705	if (!dev_ctx)
706		return;
707
708	dev_ctx->intf    = intf;
709	CURVNET_SET_QUIET(vnet0);
710	dev_ctx->context = intf->add(dev);
711	CURVNET_RESTORE();
712
713	if (dev_ctx->context) {
714		spin_lock_irq(&priv->ctx_lock);
715		list_add_tail(&dev_ctx->list, &priv->ctx_list);
716		spin_unlock_irq(&priv->ctx_lock);
717	} else {
718		kfree(dev_ctx);
719	}
720}
721
722static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
723{
724	struct mlx5_device_context *dev_ctx;
725	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
726
727	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
728		if (dev_ctx->intf == intf) {
729			spin_lock_irq(&priv->ctx_lock);
730			list_del(&dev_ctx->list);
731			spin_unlock_irq(&priv->ctx_lock);
732
733			intf->remove(dev, dev_ctx->context);
734			kfree(dev_ctx);
735			return;
736		}
737}
738
739int
740mlx5_register_device(struct mlx5_core_dev *dev)
741{
742	struct mlx5_priv *priv = &dev->priv;
743	struct mlx5_interface *intf;
744
745	mutex_lock(&intf_mutex);
746	list_add_tail(&priv->dev_list, &dev_list);
747	list_for_each_entry(intf, &intf_list, list)
748		mlx5_add_device(intf, priv);
749	mutex_unlock(&intf_mutex);
750
751	return 0;
752}
753
754void
755mlx5_unregister_device(struct mlx5_core_dev *dev)
756{
757	struct mlx5_priv *priv = &dev->priv;
758	struct mlx5_interface *intf;
759
760	mutex_lock(&intf_mutex);
761	list_for_each_entry(intf, &intf_list, list)
762		mlx5_remove_device(intf, priv);
763	list_del(&priv->dev_list);
764	mutex_unlock(&intf_mutex);
765}
766
767int mlx5_register_interface(struct mlx5_interface *intf)
768{
769	struct mlx5_priv *priv;
770
771	if (!intf->add || !intf->remove)
772		return -EINVAL;
773
774	mutex_lock(&intf_mutex);
775	list_add_tail(&intf->list, &intf_list);
776	list_for_each_entry(priv, &dev_list, dev_list)
777		mlx5_add_device(intf, priv);
778	mutex_unlock(&intf_mutex);
779
780	return 0;
781}
782EXPORT_SYMBOL(mlx5_register_interface);
783
784void mlx5_unregister_interface(struct mlx5_interface *intf)
785{
786	struct mlx5_priv *priv;
787
788	mutex_lock(&intf_mutex);
789	list_for_each_entry(priv, &dev_list, dev_list)
790		mlx5_remove_device(intf, priv);
791	list_del(&intf->list);
792	mutex_unlock(&intf_mutex);
793}
794EXPORT_SYMBOL(mlx5_unregister_interface);
795
796void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
797{
798	struct mlx5_priv *priv = &mdev->priv;
799	struct mlx5_device_context *dev_ctx;
800	unsigned long flags;
801	void *result = NULL;
802
803	spin_lock_irqsave(&priv->ctx_lock, flags);
804
805	list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
806		if ((dev_ctx->intf->protocol == protocol) &&
807		    dev_ctx->intf->get_dev) {
808			result = dev_ctx->intf->get_dev(dev_ctx->context);
809			break;
810		}
811
812	spin_unlock_irqrestore(&priv->ctx_lock, flags);
813
814	return result;
815}
816EXPORT_SYMBOL(mlx5_get_protocol_dev);
817
818static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
819{
820	struct pci_dev *pdev = dev->pdev;
821	int err = 0;
822
823	pci_set_drvdata(dev->pdev, dev);
824	strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
825	priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
826
827	mutex_init(&priv->pgdir_mutex);
828	INIT_LIST_HEAD(&priv->pgdir_list);
829	spin_lock_init(&priv->mkey_lock);
830
831	priv->numa_node = NUMA_NO_NODE;
832
833	err = mlx5_pci_enable_device(dev);
834	if (err) {
835		device_printf((&pdev->dev)->bsddev, "ERR: ""Cannot enable PCI device, aborting\n");
836		goto err_dbg;
837	}
838
839	err = request_bar(pdev);
840	if (err) {
841		device_printf((&pdev->dev)->bsddev, "ERR: ""error requesting BARs, aborting\n");
842		goto err_disable;
843	}
844
845	pci_set_master(pdev);
846
847	err = set_dma_caps(pdev);
848	if (err) {
849		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed setting DMA capabilities mask, aborting\n");
850		goto err_clr_master;
851	}
852
853	dev->iseg_base = pci_resource_start(dev->pdev, 0);
854	dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
855	if (!dev->iseg) {
856		err = -ENOMEM;
857		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed mapping initialization segment, aborting\n");
858		goto err_clr_master;
859	}
860
861	return 0;
862
863err_clr_master:
864	pci_clear_master(dev->pdev);
865	release_bar(dev->pdev);
866err_disable:
867	mlx5_pci_disable_device(dev);
868err_dbg:
869	return err;
870}
871
872static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
873{
874	iounmap(dev->iseg);
875	pci_clear_master(dev->pdev);
876	release_bar(dev->pdev);
877	mlx5_pci_disable_device(dev);
878}
879
880static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
881{
882	struct pci_dev *pdev = dev->pdev;
883	int err;
884
885	err = mlx5_vsc_find_cap(dev);
886	if (err)
887		dev_err(&pdev->dev, "Unable to find vendor specific capabilities\n");
888
889	err = mlx5_query_hca_caps(dev);
890	if (err) {
891		dev_err(&pdev->dev, "query hca failed\n");
892		goto out;
893	}
894
895	err = mlx5_query_board_id(dev);
896	if (err) {
897		dev_err(&pdev->dev, "query board id failed\n");
898		goto out;
899	}
900
901	err = mlx5_eq_init(dev);
902	if (err) {
903		dev_err(&pdev->dev, "failed to initialize eq\n");
904		goto out;
905	}
906
907	MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
908
909	err = mlx5_init_cq_table(dev);
910	if (err) {
911		dev_err(&pdev->dev, "failed to initialize cq table\n");
912		goto err_eq_cleanup;
913	}
914
915	mlx5_init_qp_table(dev);
916	mlx5_init_srq_table(dev);
917	mlx5_init_mr_table(dev);
918
919	mlx5_init_reserved_gids(dev);
920	mlx5_fpga_init(dev);
921
922	return 0;
923
924err_eq_cleanup:
925	mlx5_eq_cleanup(dev);
926
927out:
928	return err;
929}
930
931static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
932{
933	mlx5_fpga_cleanup(dev);
934	mlx5_cleanup_reserved_gids(dev);
935	mlx5_cleanup_mr_table(dev);
936	mlx5_cleanup_srq_table(dev);
937	mlx5_cleanup_qp_table(dev);
938	mlx5_cleanup_cq_table(dev);
939	mlx5_eq_cleanup(dev);
940}
941
942static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
943			 bool boot)
944{
945	struct pci_dev *pdev = dev->pdev;
946	int err;
947
948	mutex_lock(&dev->intf_state_mutex);
949	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
950		dev_warn(&dev->pdev->dev, "%s: interface is up, NOP\n",
951			 __func__);
952		goto out;
953	}
954
955	device_printf((&pdev->dev)->bsddev, "INFO: ""firmware version: %d.%d.%d\n", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
956
957	/*
958	 * On load removing any previous indication of internal error,
959	 * device is up
960	 */
961	dev->state = MLX5_DEVICE_STATE_UP;
962
963	err = mlx5_cmd_init(dev);
964	if (err) {
965		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed initializing command interface, aborting\n");
966		goto out_err;
967	}
968
969	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI);
970	if (err) {
971		device_printf((&dev->pdev->dev)->bsddev, "ERR: ""Firmware over %d MS in initializing state, aborting\n", FW_INIT_TIMEOUT_MILI);
972		goto err_cmd_cleanup;
973	}
974
975	err = mlx5_core_enable_hca(dev);
976	if (err) {
977		device_printf((&pdev->dev)->bsddev, "ERR: ""enable hca failed\n");
978		goto err_cmd_cleanup;
979	}
980
981	err = mlx5_core_set_issi(dev);
982	if (err) {
983		device_printf((&pdev->dev)->bsddev, "ERR: ""failed to set issi\n");
984		goto err_disable_hca;
985	}
986
987	err = mlx5_pagealloc_start(dev);
988	if (err) {
989		device_printf((&pdev->dev)->bsddev, "ERR: ""mlx5_pagealloc_start failed\n");
990		goto err_disable_hca;
991	}
992
993	err = mlx5_satisfy_startup_pages(dev, 1);
994	if (err) {
995		device_printf((&pdev->dev)->bsddev, "ERR: ""failed to allocate boot pages\n");
996		goto err_pagealloc_stop;
997	}
998
999	err = set_hca_ctrl(dev);
1000	if (err) {
1001		device_printf((&pdev->dev)->bsddev, "ERR: ""set_hca_ctrl failed\n");
1002		goto reclaim_boot_pages;
1003	}
1004
1005	err = handle_hca_cap(dev);
1006	if (err) {
1007		device_printf((&pdev->dev)->bsddev, "ERR: ""handle_hca_cap failed\n");
1008		goto reclaim_boot_pages;
1009	}
1010
1011	err = handle_hca_cap_atomic(dev);
1012	if (err) {
1013		device_printf((&pdev->dev)->bsddev, "ERR: ""handle_hca_cap_atomic failed\n");
1014		goto reclaim_boot_pages;
1015	}
1016
1017	err = mlx5_satisfy_startup_pages(dev, 0);
1018	if (err) {
1019		device_printf((&pdev->dev)->bsddev, "ERR: ""failed to allocate init pages\n");
1020		goto reclaim_boot_pages;
1021	}
1022
1023	err = mlx5_cmd_init_hca(dev);
1024	if (err) {
1025		device_printf((&pdev->dev)->bsddev, "ERR: ""init hca failed\n");
1026		goto reclaim_boot_pages;
1027	}
1028
1029	mlx5_start_health_poll(dev);
1030
1031	if (boot && mlx5_init_once(dev, priv)) {
1032		dev_err(&pdev->dev, "sw objs init failed\n");
1033		goto err_stop_poll;
1034	}
1035
1036	err = mlx5_enable_msix(dev);
1037	if (err) {
1038		device_printf((&pdev->dev)->bsddev, "ERR: ""enable msix failed\n");
1039		goto err_cleanup_once;
1040	}
1041
1042	err = mlx5_alloc_uuars(dev, &priv->uuari);
1043	if (err) {
1044		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed allocating uar, aborting\n");
1045		goto err_disable_msix;
1046	}
1047
1048	err = mlx5_start_eqs(dev);
1049	if (err) {
1050		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed to start pages and async EQs\n");
1051		goto err_free_uar;
1052	}
1053
1054	err = alloc_comp_eqs(dev);
1055	if (err) {
1056		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed to alloc completion EQs\n");
1057		goto err_stop_eqs;
1058	}
1059
1060	if (map_bf_area(dev))
1061		device_printf((&pdev->dev)->bsddev, "ERR: ""Failed to map blue flame area\n");
1062
1063	err = mlx5_init_fs(dev);
1064	if (err) {
1065		mlx5_core_err(dev, "flow steering init %d\n", err);
1066		goto err_free_comp_eqs;
1067	}
1068
1069	err = mlx5_fpga_device_start(dev);
1070	if (err) {
1071		dev_err(&pdev->dev, "fpga device start failed %d\n", err);
1072		goto err_fpga_start;
1073	}
1074
1075	err = mlx5_register_device(dev);
1076	if (err) {
1077		dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err);
1078		goto err_fs;
1079	}
1080
1081	clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
1082	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1083
1084out:
1085	mutex_unlock(&dev->intf_state_mutex);
1086	return 0;
1087
1088err_fpga_start:
1089err_fs:
1090	mlx5_cleanup_fs(dev);
1091
1092err_free_comp_eqs:
1093	free_comp_eqs(dev);
1094	unmap_bf_area(dev);
1095
1096err_stop_eqs:
1097	mlx5_stop_eqs(dev);
1098
1099err_free_uar:
1100	mlx5_free_uuars(dev, &priv->uuari);
1101
1102err_disable_msix:
1103	mlx5_disable_msix(dev);
1104
1105err_cleanup_once:
1106	if (boot)
1107		mlx5_cleanup_once(dev);
1108
1109err_stop_poll:
1110	mlx5_stop_health_poll(dev, boot);
1111	if (mlx5_cmd_teardown_hca(dev)) {
1112		device_printf((&dev->pdev->dev)->bsddev, "ERR: ""tear_down_hca failed, skip cleanup\n");
1113		goto out_err;
1114	}
1115
1116reclaim_boot_pages:
1117	mlx5_reclaim_startup_pages(dev);
1118
1119err_pagealloc_stop:
1120	mlx5_pagealloc_stop(dev);
1121
1122err_disable_hca:
1123	mlx5_core_disable_hca(dev);
1124
1125err_cmd_cleanup:
1126	mlx5_cmd_cleanup(dev);
1127
1128out_err:
1129	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1130	mutex_unlock(&dev->intf_state_mutex);
1131
1132	return err;
1133}
1134
1135static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1136			   bool cleanup)
1137{
1138	int err = 0;
1139
1140	if (cleanup)
1141		mlx5_drain_health_recovery(dev);
1142
1143	mutex_lock(&dev->intf_state_mutex);
1144	if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) {
1145		dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__);
1146                if (cleanup)
1147                        mlx5_cleanup_once(dev);
1148		goto out;
1149	}
1150
1151	mlx5_unregister_device(dev);
1152
1153	mlx5_fpga_device_stop(dev);
1154	mlx5_cleanup_fs(dev);
1155	unmap_bf_area(dev);
1156	mlx5_wait_for_reclaim_vfs_pages(dev);
1157	free_comp_eqs(dev);
1158	mlx5_stop_eqs(dev);
1159	mlx5_free_uuars(dev, &priv->uuari);
1160	mlx5_disable_msix(dev);
1161        if (cleanup)
1162                mlx5_cleanup_once(dev);
1163	mlx5_stop_health_poll(dev, cleanup);
1164	err = mlx5_cmd_teardown_hca(dev);
1165	if (err) {
1166		device_printf((&dev->pdev->dev)->bsddev, "ERR: ""tear_down_hca failed, skip cleanup\n");
1167		goto out;
1168	}
1169	mlx5_pagealloc_stop(dev);
1170	mlx5_reclaim_startup_pages(dev);
1171	mlx5_core_disable_hca(dev);
1172	mlx5_cmd_cleanup(dev);
1173
1174out:
1175	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1176	set_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state);
1177	mutex_unlock(&dev->intf_state_mutex);
1178	return err;
1179}
1180
1181void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
1182		     unsigned long param)
1183{
1184	struct mlx5_priv *priv = &dev->priv;
1185	struct mlx5_device_context *dev_ctx;
1186	unsigned long flags;
1187
1188	spin_lock_irqsave(&priv->ctx_lock, flags);
1189
1190	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
1191		if (dev_ctx->intf->event)
1192			dev_ctx->intf->event(dev, dev_ctx->context, event, param);
1193
1194	spin_unlock_irqrestore(&priv->ctx_lock, flags);
1195}
1196
1197struct mlx5_core_event_handler {
1198	void (*event)(struct mlx5_core_dev *dev,
1199		      enum mlx5_dev_event event,
1200		      void *data);
1201};
1202
1203static int init_one(struct pci_dev *pdev,
1204		    const struct pci_device_id *id)
1205{
1206	struct mlx5_core_dev *dev;
1207	struct mlx5_priv *priv;
1208	device_t bsddev = pdev->dev.bsddev;
1209	int err;
1210
1211	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1212	priv = &dev->priv;
1213	if (id)
1214		priv->pci_dev_data = id->driver_data;
1215
1216	if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profiles)) {
1217		device_printf(bsddev, "WARN: selected profile out of range, selecting default (%d)\n", MLX5_DEFAULT_PROF);
1218		prof_sel = MLX5_DEFAULT_PROF;
1219	}
1220	dev->profile = &profiles[prof_sel];
1221	dev->pdev = pdev;
1222	dev->event = mlx5_core_event;
1223
1224	/* Set desc */
1225	device_set_desc(bsddev, mlx5_version);
1226
1227	sysctl_ctx_init(&dev->sysctl_ctx);
1228	SYSCTL_ADD_INT(&dev->sysctl_ctx,
1229	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1230	    OID_AUTO, "msix_eqvec", CTLFLAG_RDTUN, &dev->msix_eqvec, 0,
1231	    "Maximum number of MSIX event queue vectors, if set");
1232
1233	INIT_LIST_HEAD(&priv->ctx_list);
1234	spin_lock_init(&priv->ctx_lock);
1235	mutex_init(&dev->pci_status_mutex);
1236	mutex_init(&dev->intf_state_mutex);
1237	err = mlx5_pci_init(dev, priv);
1238	if (err) {
1239		device_printf(bsddev, "ERR: mlx5_pci_init failed %d\n", err);
1240		goto clean_dev;
1241	}
1242
1243	err = mlx5_health_init(dev);
1244	if (err) {
1245		device_printf(bsddev, "ERR: mlx5_health_init failed %d\n", err);
1246		goto close_pci;
1247	}
1248
1249	mlx5_pagealloc_init(dev);
1250
1251	err = mlx5_load_one(dev, priv, true);
1252	if (err) {
1253		device_printf(bsddev, "ERR: mlx5_load_one failed %d\n", err);
1254		goto clean_health;
1255	}
1256
1257	mlx5_fwdump_prep(dev);
1258
1259	pci_save_state(bsddev);
1260	return 0;
1261
1262clean_health:
1263	mlx5_pagealloc_cleanup(dev);
1264	mlx5_health_cleanup(dev);
1265close_pci:
1266	mlx5_pci_close(dev, priv);
1267clean_dev:
1268	sysctl_ctx_free(&dev->sysctl_ctx);
1269	kfree(dev);
1270	return err;
1271}
1272
1273static void remove_one(struct pci_dev *pdev)
1274{
1275	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1276	struct mlx5_priv *priv = &dev->priv;
1277
1278	if (mlx5_unload_one(dev, priv, true)) {
1279		dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n");
1280		mlx5_health_cleanup(dev);
1281		return;
1282	}
1283
1284	mlx5_fwdump_clean(dev);
1285	mlx5_pagealloc_cleanup(dev);
1286	mlx5_health_cleanup(dev);
1287	mlx5_pci_close(dev, priv);
1288	pci_set_drvdata(pdev, NULL);
1289	sysctl_ctx_free(&dev->sysctl_ctx);
1290	kfree(dev);
1291}
1292
1293static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
1294					      pci_channel_state_t state)
1295{
1296	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1297	struct mlx5_priv *priv = &dev->priv;
1298
1299	dev_info(&pdev->dev, "%s was called\n", __func__);
1300	mlx5_enter_error_state(dev, false);
1301	mlx5_unload_one(dev, priv, false);
1302
1303	if (state) {
1304		mlx5_drain_health_wq(dev);
1305		mlx5_pci_disable_device(dev);
1306	}
1307
1308	return state == pci_channel_io_perm_failure ?
1309		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
1310}
1311
1312static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
1313{
1314	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1315	int err = 0;
1316
1317	dev_info(&pdev->dev, "%s was called\n", __func__);
1318
1319	err = mlx5_pci_enable_device(dev);
1320	if (err) {
1321		dev_err(&pdev->dev, "%s: mlx5_pci_enable_device failed with error code: %d\n"
1322			, __func__, err);
1323		return PCI_ERS_RESULT_DISCONNECT;
1324	}
1325	pci_set_master(pdev);
1326	pci_set_powerstate(pdev->dev.bsddev, PCI_POWERSTATE_D0);
1327	pci_restore_state(pdev->dev.bsddev);
1328	pci_save_state(pdev->dev.bsddev);
1329
1330	return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
1331}
1332
1333/* wait for the device to show vital signs. For now we check
1334 * that we can read the device ID and that the health buffer
1335 * shows a non zero value which is different than 0xffffffff
1336 */
1337static void wait_vital(struct pci_dev *pdev)
1338{
1339	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1340	struct mlx5_core_health *health = &dev->priv.health;
1341	const int niter = 100;
1342	u32 count;
1343	u16 did;
1344	int i;
1345
1346	/* Wait for firmware to be ready after reset */
1347	msleep(1000);
1348	for (i = 0; i < niter; i++) {
1349		if (pci_read_config_word(pdev, 2, &did)) {
1350			dev_warn(&pdev->dev, "failed reading config word\n");
1351			break;
1352		}
1353		if (did == pdev->device) {
1354			dev_info(&pdev->dev, "device ID correctly read after %d iterations\n", i);
1355			break;
1356		}
1357		msleep(50);
1358	}
1359	if (i == niter)
1360		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
1361
1362	for (i = 0; i < niter; i++) {
1363		count = ioread32be(health->health_counter);
1364		if (count && count != 0xffffffff) {
1365			dev_info(&pdev->dev, "Counter value 0x%x after %d iterations\n", count, i);
1366			break;
1367		}
1368		msleep(50);
1369	}
1370
1371	if (i == niter)
1372		dev_warn(&pdev->dev, "%s-%d: could not read device ID\n", __func__, __LINE__);
1373}
1374
1375static void mlx5_pci_resume(struct pci_dev *pdev)
1376{
1377	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1378	struct mlx5_priv *priv = &dev->priv;
1379	int err;
1380
1381	dev_info(&pdev->dev, "%s was called\n", __func__);
1382
1383	wait_vital(pdev);
1384
1385	err = mlx5_load_one(dev, priv, false);
1386	if (err)
1387		dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n"
1388			, __func__, err);
1389	else
1390		dev_info(&pdev->dev, "%s: device recovered\n", __func__);
1391}
1392
1393static const struct pci_error_handlers mlx5_err_handler = {
1394	.error_detected = mlx5_pci_err_detected,
1395	.slot_reset	= mlx5_pci_slot_reset,
1396	.resume		= mlx5_pci_resume
1397};
1398
1399static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
1400{
1401	int err;
1402
1403	if (!MLX5_CAP_GEN(dev, force_teardown)) {
1404		mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
1405		return -EOPNOTSUPP;
1406	}
1407
1408	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1409		mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
1410		return -EAGAIN;
1411	}
1412
1413	/* Panic tear down fw command will stop the PCI bus communication
1414	 * with the HCA, so the health polll is no longer needed.
1415	 */
1416	mlx5_drain_health_wq(dev);
1417	mlx5_stop_health_poll(dev, false);
1418
1419	err = mlx5_cmd_force_teardown_hca(dev);
1420	if (err) {
1421		mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", err);
1422		return err;
1423	}
1424
1425	mlx5_enter_error_state(dev, true);
1426
1427	return 0;
1428}
1429
1430static void shutdown_one(struct pci_dev *pdev)
1431{
1432	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1433	struct mlx5_priv *priv = &dev->priv;
1434	int err;
1435
1436	set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state);
1437	err = mlx5_try_fast_unload(dev);
1438	if (err)
1439	        mlx5_unload_one(dev, priv, false);
1440	mlx5_pci_disable_device(dev);
1441}
1442
1443static const struct pci_device_id mlx5_core_pci_table[] = {
1444	{ PCI_VDEVICE(MELLANOX, 4113) }, /* Connect-IB */
1445	{ PCI_VDEVICE(MELLANOX, 4114) }, /* Connect-IB VF */
1446	{ PCI_VDEVICE(MELLANOX, 4115) }, /* ConnectX-4 */
1447	{ PCI_VDEVICE(MELLANOX, 4116) }, /* ConnectX-4 VF */
1448	{ PCI_VDEVICE(MELLANOX, 4117) }, /* ConnectX-4LX */
1449	{ PCI_VDEVICE(MELLANOX, 4118) }, /* ConnectX-4LX VF */
1450	{ PCI_VDEVICE(MELLANOX, 4119) }, /* ConnectX-5 */
1451	{ PCI_VDEVICE(MELLANOX, 4120) }, /* ConnectX-5 VF */
1452	{ PCI_VDEVICE(MELLANOX, 4121) },
1453	{ PCI_VDEVICE(MELLANOX, 4122) },
1454	{ PCI_VDEVICE(MELLANOX, 4123) },
1455	{ PCI_VDEVICE(MELLANOX, 4124) },
1456	{ PCI_VDEVICE(MELLANOX, 4125) },
1457	{ PCI_VDEVICE(MELLANOX, 4126) },
1458	{ PCI_VDEVICE(MELLANOX, 4127) },
1459	{ PCI_VDEVICE(MELLANOX, 4128) },
1460	{ PCI_VDEVICE(MELLANOX, 4129) },
1461	{ PCI_VDEVICE(MELLANOX, 4130) },
1462	{ PCI_VDEVICE(MELLANOX, 4131) },
1463	{ PCI_VDEVICE(MELLANOX, 4132) },
1464	{ PCI_VDEVICE(MELLANOX, 4133) },
1465	{ PCI_VDEVICE(MELLANOX, 4134) },
1466	{ PCI_VDEVICE(MELLANOX, 4135) },
1467	{ PCI_VDEVICE(MELLANOX, 4136) },
1468	{ PCI_VDEVICE(MELLANOX, 4137) },
1469	{ PCI_VDEVICE(MELLANOX, 4138) },
1470	{ PCI_VDEVICE(MELLANOX, 4139) },
1471	{ PCI_VDEVICE(MELLANOX, 4140) },
1472	{ PCI_VDEVICE(MELLANOX, 4141) },
1473	{ PCI_VDEVICE(MELLANOX, 4142) },
1474	{ PCI_VDEVICE(MELLANOX, 4143) },
1475	{ PCI_VDEVICE(MELLANOX, 4144) },
1476	{ 0, }
1477};
1478
1479MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
1480
1481void mlx5_disable_device(struct mlx5_core_dev *dev)
1482{
1483	mlx5_pci_err_detected(dev->pdev, 0);
1484}
1485
1486void mlx5_recover_device(struct mlx5_core_dev *dev)
1487{
1488	mlx5_pci_disable_device(dev);
1489	if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
1490		mlx5_pci_resume(dev->pdev);
1491}
1492
1493struct pci_driver mlx5_core_driver = {
1494	.name           = DRIVER_NAME,
1495	.id_table       = mlx5_core_pci_table,
1496	.shutdown	= shutdown_one,
1497	.probe          = init_one,
1498	.remove         = remove_one,
1499	.err_handler	= &mlx5_err_handler
1500};
1501
1502static int __init init(void)
1503{
1504	int err;
1505
1506	err = pci_register_driver(&mlx5_core_driver);
1507	if (err)
1508		goto err_debug;
1509
1510	err = mlx5_fwdump_init();
1511	if (err)
1512		goto err_fwdump;
1513
1514 	return 0;
1515
1516err_fwdump:
1517	pci_unregister_driver(&mlx5_core_driver);
1518
1519err_debug:
1520	return err;
1521}
1522
1523static void __exit cleanup(void)
1524{
1525	mlx5_fwdump_fini();
1526	pci_unregister_driver(&mlx5_core_driver);
1527}
1528
1529module_init(init);
1530module_exit(cleanup);
1531