mlx5_main.c revision 361413
1290650Shselasky/*-
2347819Shselasky * Copyright (c) 2013-2019, Mellanox Technologies, Ltd.  All rights reserved.
3290650Shselasky *
4290650Shselasky * Redistribution and use in source and binary forms, with or without
5290650Shselasky * modification, are permitted provided that the following conditions
6290650Shselasky * are met:
7290650Shselasky * 1. Redistributions of source code must retain the above copyright
8290650Shselasky *    notice, this list of conditions and the following disclaimer.
9290650Shselasky * 2. Redistributions in binary form must reproduce the above copyright
10290650Shselasky *    notice, this list of conditions and the following disclaimer in the
11290650Shselasky *    documentation and/or other materials provided with the distribution.
12290650Shselasky *
13290650Shselasky * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14290650Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15290650Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16290650Shselasky * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17290650Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18290650Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19290650Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20290650Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21290650Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22290650Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23290650Shselasky * SUCH DAMAGE.
24290650Shselasky *
25290650Shselasky * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c 361413 2020-05-23 11:59:36Z kib $
26290650Shselasky */
27290650Shselasky
28290650Shselasky#include <linux/kmod.h>
29290650Shselasky#include <linux/module.h>
30290650Shselasky#include <linux/errno.h>
31290650Shselasky#include <linux/pci.h>
32290650Shselasky#include <linux/dma-mapping.h>
33290650Shselasky#include <linux/slab.h>
34290650Shselasky#include <linux/io-mapping.h>
35290650Shselasky#include <linux/interrupt.h>
36347802Shselasky#include <linux/hardirq.h>
37290650Shselasky#include <dev/mlx5/driver.h>
38290650Shselasky#include <dev/mlx5/cq.h>
39290650Shselasky#include <dev/mlx5/qp.h>
40290650Shselasky#include <dev/mlx5/srq.h>
41353197Shselasky#include <dev/mlx5/mpfs.h>
42359544Skib#include <dev/mlx5/vport.h>
43290650Shselasky#include <linux/delay.h>
44290650Shselasky#include <dev/mlx5/mlx5_ifc.h>
45341958Shselasky#include <dev/mlx5/mlx5_fpga/core.h>
46341958Shselasky#include <dev/mlx5/mlx5_lib/mlx5.h>
47290650Shselasky#include "mlx5_core.h"
48359544Skib#include "eswitch.h"
49329200Shselasky#include "fs_core.h"
50359540Skib#ifdef PCI_IOV
51359540Skib#include <sys/nv.h>
52359540Skib#include <dev/pci/pci_iov.h>
53359540Skib#include <sys/iov_schema.h>
54359540Skib#endif
55290650Shselasky
56341948Shselaskystatic const char mlx5_version[] = "Mellanox Core driver "
57341948Shselasky	DRIVER_VERSION " (" DRIVER_RELDATE ")";
58290650ShselaskyMODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
59290650ShselaskyMODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
60290650ShselaskyMODULE_LICENSE("Dual BSD/GPL");
61290650ShselaskyMODULE_DEPEND(mlx5, linuxkpi, 1, 1, 1);
62347839ShselaskyMODULE_DEPEND(mlx5, mlxfw, 1, 1, 1);
63347847ShselaskyMODULE_DEPEND(mlx5, firmware, 1, 1, 1);
64290650ShselaskyMODULE_VERSION(mlx5, 1);
65290650Shselasky
66347835ShselaskySYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW, 0, "mlx5 hardware controls");
67347835Shselasky
68290650Shselaskyint mlx5_core_debug_mask;
69347835ShselaskySYSCTL_INT(_hw_mlx5, OID_AUTO, debug_mask, CTLFLAG_RWTUN,
70347835Shselasky    &mlx5_core_debug_mask, 0,
71347835Shselasky    "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
72290650Shselasky
73290650Shselasky#define MLX5_DEFAULT_PROF	2
74347835Shselaskystatic int mlx5_prof_sel = MLX5_DEFAULT_PROF;
75347835ShselaskySYSCTL_INT(_hw_mlx5, OID_AUTO, prof_sel, CTLFLAG_RWTUN,
76347835Shselasky    &mlx5_prof_sel, 0,
77347835Shselasky    "profile selector. Valid range 0 - 2");
78290650Shselasky
79347819Shselaskystatic int mlx5_fast_unload_enabled = 1;
80347819ShselaskySYSCTL_INT(_hw_mlx5, OID_AUTO, fast_unload_enabled, CTLFLAG_RWTUN,
81347819Shselasky    &mlx5_fast_unload_enabled, 0,
82347819Shselasky    "Set to enable fast unload. Clear to disable.");
83347819Shselasky
84290650Shselasky#define NUMA_NO_NODE       -1
85290650Shselasky
86290650Shselaskystatic LIST_HEAD(intf_list);
87290650Shselaskystatic LIST_HEAD(dev_list);
88290650Shselaskystatic DEFINE_MUTEX(intf_mutex);
89290650Shselasky
90290650Shselaskystruct mlx5_device_context {
91290650Shselasky	struct list_head	list;
92290650Shselasky	struct mlx5_interface  *intf;
93290650Shselasky	void		       *context;
94290650Shselasky};
95290650Shselasky
96329209Shselaskyenum {
97329209Shselasky	MLX5_ATOMIC_REQ_MODE_BE = 0x0,
98329209Shselasky	MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
99329209Shselasky};
100329209Shselasky
101290650Shselaskystatic struct mlx5_profile profiles[] = {
102290650Shselasky	[0] = {
103290650Shselasky		.mask           = 0,
104290650Shselasky	},
105290650Shselasky	[1] = {
106290650Shselasky		.mask		= MLX5_PROF_MASK_QP_SIZE,
107290650Shselasky		.log_max_qp	= 12,
108290650Shselasky	},
109290650Shselasky	[2] = {
110290650Shselasky		.mask		= MLX5_PROF_MASK_QP_SIZE |
111290650Shselasky				  MLX5_PROF_MASK_MR_CACHE,
112290650Shselasky		.log_max_qp	= 17,
113290650Shselasky		.mr_cache[0]	= {
114290650Shselasky			.size	= 500,
115290650Shselasky			.limit	= 250
116290650Shselasky		},
117290650Shselasky		.mr_cache[1]	= {
118290650Shselasky			.size	= 500,
119290650Shselasky			.limit	= 250
120290650Shselasky		},
121290650Shselasky		.mr_cache[2]	= {
122290650Shselasky			.size	= 500,
123290650Shselasky			.limit	= 250
124290650Shselasky		},
125290650Shselasky		.mr_cache[3]	= {
126290650Shselasky			.size	= 500,
127290650Shselasky			.limit	= 250
128290650Shselasky		},
129290650Shselasky		.mr_cache[4]	= {
130290650Shselasky			.size	= 500,
131290650Shselasky			.limit	= 250
132290650Shselasky		},
133290650Shselasky		.mr_cache[5]	= {
134290650Shselasky			.size	= 500,
135290650Shselasky			.limit	= 250
136290650Shselasky		},
137290650Shselasky		.mr_cache[6]	= {
138290650Shselasky			.size	= 500,
139290650Shselasky			.limit	= 250
140290650Shselasky		},
141290650Shselasky		.mr_cache[7]	= {
142290650Shselasky			.size	= 500,
143290650Shselasky			.limit	= 250
144290650Shselasky		},
145290650Shselasky		.mr_cache[8]	= {
146290650Shselasky			.size	= 500,
147290650Shselasky			.limit	= 250
148290650Shselasky		},
149290650Shselasky		.mr_cache[9]	= {
150290650Shselasky			.size	= 500,
151290650Shselasky			.limit	= 250
152290650Shselasky		},
153290650Shselasky		.mr_cache[10]	= {
154290650Shselasky			.size	= 500,
155290650Shselasky			.limit	= 250
156290650Shselasky		},
157290650Shselasky		.mr_cache[11]	= {
158290650Shselasky			.size	= 500,
159290650Shselasky			.limit	= 250
160290650Shselasky		},
161290650Shselasky		.mr_cache[12]	= {
162290650Shselasky			.size	= 64,
163290650Shselasky			.limit	= 32
164290650Shselasky		},
165290650Shselasky		.mr_cache[13]	= {
166290650Shselasky			.size	= 32,
167290650Shselasky			.limit	= 16
168290650Shselasky		},
169290650Shselasky		.mr_cache[14]	= {
170290650Shselasky			.size	= 16,
171290650Shselasky			.limit	= 8
172290650Shselasky		},
173290650Shselasky	},
174290650Shselasky	[3] = {
175290650Shselasky		.mask		= MLX5_PROF_MASK_QP_SIZE,
176290650Shselasky		.log_max_qp	= 17,
177290650Shselasky	},
178290650Shselasky};
179290650Shselasky
180359544Skib#ifdef PCI_IOV
181359544Skibstatic const char iov_mac_addr_name[] = "mac-addr";
182359544Skib#endif
183359544Skib
184290650Shselaskystatic int set_dma_caps(struct pci_dev *pdev)
185290650Shselasky{
186353224Shselasky	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
187290650Shselasky	int err;
188290650Shselasky
189290650Shselasky	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
190290650Shselasky	if (err) {
191353224Shselasky		mlx5_core_warn(dev, "couldn't set 64-bit PCI DMA mask\n");
192290650Shselasky		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
193290650Shselasky		if (err) {
194353224Shselasky			mlx5_core_err(dev, "Can't set PCI DMA mask, aborting\n");
195290650Shselasky			return err;
196290650Shselasky		}
197290650Shselasky	}
198290650Shselasky
199290650Shselasky	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
200290650Shselasky	if (err) {
201353224Shselasky		mlx5_core_warn(dev, "couldn't set 64-bit consistent PCI DMA mask\n");
202290650Shselasky		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
203290650Shselasky		if (err) {
204353224Shselasky			mlx5_core_err(dev, "Can't set consistent PCI DMA mask, aborting\n");
205290650Shselasky			return err;
206290650Shselasky		}
207290650Shselasky	}
208290650Shselasky
209290650Shselasky	dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
210290650Shselasky	return err;
211290650Shselasky}
212290650Shselasky
213347862Shselaskyint mlx5_pci_read_power_status(struct mlx5_core_dev *dev,
214347862Shselasky			       u16 *p_power, u8 *p_status)
215347862Shselasky{
216347862Shselasky	u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {};
217347862Shselasky	u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {};
218347862Shselasky	int err;
219347862Shselasky
220347862Shselasky	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
221347862Shselasky	    MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MPEIN, 0, 0);
222347862Shselasky
223347862Shselasky	*p_status = MLX5_GET(mpein_reg, out, pwr_status);
224347862Shselasky	*p_power = MLX5_GET(mpein_reg, out, pci_power);
225347862Shselasky	return err;
226347862Shselasky}
227347862Shselasky
228331580Shselaskystatic int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
229331580Shselasky{
230331580Shselasky	struct pci_dev *pdev = dev->pdev;
231331580Shselasky	int err = 0;
232331580Shselasky
233331580Shselasky	mutex_lock(&dev->pci_status_mutex);
234331580Shselasky	if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
235331580Shselasky		err = pci_enable_device(pdev);
236331580Shselasky		if (!err)
237331580Shselasky			dev->pci_status = MLX5_PCI_STATUS_ENABLED;
238331580Shselasky	}
239331580Shselasky	mutex_unlock(&dev->pci_status_mutex);
240331580Shselasky
241331580Shselasky	return err;
242331580Shselasky}
243331580Shselasky
244331580Shselaskystatic void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
245331580Shselasky{
246331580Shselasky	struct pci_dev *pdev = dev->pdev;
247331580Shselasky
248331580Shselasky	mutex_lock(&dev->pci_status_mutex);
249331580Shselasky	if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
250331580Shselasky		pci_disable_device(pdev);
251331580Shselasky		dev->pci_status = MLX5_PCI_STATUS_DISABLED;
252331580Shselasky	}
253331580Shselasky	mutex_unlock(&dev->pci_status_mutex);
254331580Shselasky}
255331580Shselasky
256290650Shselaskystatic int request_bar(struct pci_dev *pdev)
257290650Shselasky{
258353224Shselasky	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
259290650Shselasky	int err = 0;
260290650Shselasky
261290650Shselasky	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
262353224Shselasky		mlx5_core_err(dev, "Missing registers BAR, aborting\n");
263290650Shselasky		return -ENODEV;
264290650Shselasky	}
265290650Shselasky
266290650Shselasky	err = pci_request_regions(pdev, DRIVER_NAME);
267290650Shselasky	if (err)
268353224Shselasky		mlx5_core_err(dev, "Couldn't get PCI resources, aborting\n");
269290650Shselasky
270290650Shselasky	return err;
271290650Shselasky}
272290650Shselasky
273290650Shselaskystatic void release_bar(struct pci_dev *pdev)
274290650Shselasky{
275290650Shselasky	pci_release_regions(pdev);
276290650Shselasky}
277290650Shselasky
278290650Shselaskystatic int mlx5_enable_msix(struct mlx5_core_dev *dev)
279290650Shselasky{
280290650Shselasky	struct mlx5_priv *priv = &dev->priv;
281290650Shselasky	struct mlx5_eq_table *table = &priv->eq_table;
282290650Shselasky	int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
283338554Shselasky	int limit = dev->msix_eqvec;
284337112Shselasky	int nvec = MLX5_EQ_VEC_COMP_BASE;
285290650Shselasky	int i;
286290650Shselasky
287337112Shselasky	if (limit > 0)
288337112Shselasky		nvec += limit;
289337112Shselasky	else
290337112Shselasky		nvec += MLX5_CAP_GEN(dev, num_ports) * num_online_cpus();
291337112Shselasky
292353189Shselasky	if (nvec > num_eqs)
293353189Shselasky		nvec = num_eqs;
294353189Shselasky	if (nvec > 256)
295353189Shselasky		nvec = 256;	/* limit of firmware API */
296290650Shselasky	if (nvec <= MLX5_EQ_VEC_COMP_BASE)
297290650Shselasky		return -ENOMEM;
298290650Shselasky
299290650Shselasky	priv->msix_arr = kzalloc(nvec * sizeof(*priv->msix_arr), GFP_KERNEL);
300290650Shselasky
301290650Shselasky	for (i = 0; i < nvec; i++)
302290650Shselasky		priv->msix_arr[i].entry = i;
303290650Shselasky
304290650Shselasky	nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr,
305290650Shselasky				     MLX5_EQ_VEC_COMP_BASE + 1, nvec);
306290650Shselasky	if (nvec < 0)
307290650Shselasky		return nvec;
308290650Shselasky
309290650Shselasky	table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
310290650Shselasky	return 0;
311290650Shselasky}
312290650Shselasky
313290650Shselaskystatic void mlx5_disable_msix(struct mlx5_core_dev *dev)
314290650Shselasky{
315290650Shselasky	struct mlx5_priv *priv = &dev->priv;
316290650Shselasky
317290650Shselasky	pci_disable_msix(dev->pdev);
318290650Shselasky	kfree(priv->msix_arr);
319290650Shselasky}
320290650Shselasky
321290650Shselaskystruct mlx5_reg_host_endianess {
322290650Shselasky	u8	he;
323290650Shselasky	u8      rsvd[15];
324290650Shselasky};
325290650Shselasky
326290650Shselasky
327290650Shselasky#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
328290650Shselasky
329290650Shselaskyenum {
330290650Shselasky	MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
331306233Shselasky				MLX5_DEV_CAP_FLAG_DCT |
332306233Shselasky				MLX5_DEV_CAP_FLAG_DRAIN_SIGERR,
333290650Shselasky};
334290650Shselasky
335353224Shselaskystatic u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
336290650Shselasky{
337290650Shselasky	switch (size) {
338290650Shselasky	case 128:
339290650Shselasky		return 0;
340290650Shselasky	case 256:
341290650Shselasky		return 1;
342290650Shselasky	case 512:
343290650Shselasky		return 2;
344290650Shselasky	case 1024:
345290650Shselasky		return 3;
346290650Shselasky	case 2048:
347290650Shselasky		return 4;
348290650Shselasky	case 4096:
349290650Shselasky		return 5;
350290650Shselasky	default:
351353224Shselasky		mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
352290650Shselasky		return 0;
353290650Shselasky	}
354290650Shselasky}
355290650Shselasky
356331807Shselaskystatic int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
357331807Shselasky				   enum mlx5_cap_type cap_type,
358331807Shselasky				   enum mlx5_cap_mode cap_mode)
359290650Shselasky{
360290650Shselasky	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
361290650Shselasky	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
362290650Shselasky	void *out, *hca_caps;
363290650Shselasky	u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
364290650Shselasky	int err;
365290650Shselasky
366290650Shselasky	memset(in, 0, sizeof(in));
367290650Shselasky	out = kzalloc(out_sz, GFP_KERNEL);
368290650Shselasky
369290650Shselasky	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
370290650Shselasky	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
371290650Shselasky	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
372290650Shselasky	if (err) {
373290650Shselasky		mlx5_core_warn(dev,
374290650Shselasky			       "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
375290650Shselasky			       cap_type, cap_mode, err);
376290650Shselasky		goto query_ex;
377290650Shselasky	}
378290650Shselasky
379290650Shselasky	hca_caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
380290650Shselasky
381290650Shselasky	switch (cap_mode) {
382290650Shselasky	case HCA_CAP_OPMOD_GET_MAX:
383290650Shselasky		memcpy(dev->hca_caps_max[cap_type], hca_caps,
384290650Shselasky		       MLX5_UN_SZ_BYTES(hca_cap_union));
385290650Shselasky		break;
386290650Shselasky	case HCA_CAP_OPMOD_GET_CUR:
387290650Shselasky		memcpy(dev->hca_caps_cur[cap_type], hca_caps,
388290650Shselasky		       MLX5_UN_SZ_BYTES(hca_cap_union));
389290650Shselasky		break;
390290650Shselasky	default:
391290650Shselasky		mlx5_core_warn(dev,
392290650Shselasky			       "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
393290650Shselasky			       cap_type, cap_mode);
394290650Shselasky		err = -EINVAL;
395290650Shselasky		break;
396290650Shselasky	}
397290650Shselaskyquery_ex:
398290650Shselasky	kfree(out);
399290650Shselasky	return err;
400290650Shselasky}
401290650Shselasky
402331807Shselaskyint mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
403331807Shselasky{
404331807Shselasky	int ret;
405331807Shselasky
406331807Shselasky	ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
407331807Shselasky	if (ret)
408331807Shselasky		return ret;
409331807Shselasky
410331807Shselasky	return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
411331807Shselasky}
412331807Shselasky
413290650Shselaskystatic int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
414290650Shselasky{
415331807Shselasky	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
416290650Shselasky
417290650Shselasky	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
418290650Shselasky
419331807Shselasky	return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
420290650Shselasky}
421290650Shselasky
422290650Shselaskystatic int handle_hca_cap(struct mlx5_core_dev *dev)
423290650Shselasky{
424290650Shselasky	void *set_ctx = NULL;
425290650Shselasky	struct mlx5_profile *prof = dev->profile;
426290650Shselasky	int err = -ENOMEM;
427290650Shselasky	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
428290650Shselasky	void *set_hca_cap;
429290650Shselasky
430290650Shselasky	set_ctx = kzalloc(set_sz, GFP_KERNEL);
431290650Shselasky
432331807Shselasky	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
433290650Shselasky	if (err)
434290650Shselasky		goto query_ex;
435290650Shselasky
436290650Shselasky	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
437290650Shselasky				   capability);
438290650Shselasky	memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL],
439290650Shselasky	       MLX5_ST_SZ_BYTES(cmd_hca_cap));
440290650Shselasky
441290650Shselasky	mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
442290650Shselasky		      mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
443290650Shselasky		      128);
444290650Shselasky	/* we limit the size of the pkey table to 128 entries for now */
445290650Shselasky	MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
446353224Shselasky		 to_fw_pkey_sz(dev, 128));
447290650Shselasky
448290650Shselasky	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
449290650Shselasky		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
450290650Shselasky			 prof->log_max_qp);
451290650Shselasky
452290650Shselasky	/* disable cmdif checksum */
453290650Shselasky	MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
454290650Shselasky
455306233Shselasky	/* enable drain sigerr */
456306233Shselasky	MLX5_SET(cmd_hca_cap, set_hca_cap, drain_sigerr, 1);
457306233Shselasky
458290650Shselasky	MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
459290650Shselasky
460290650Shselasky	err = set_caps(dev, set_ctx, set_sz);
461290650Shselasky
462290650Shselaskyquery_ex:
463290650Shselasky	kfree(set_ctx);
464290650Shselasky	return err;
465290650Shselasky}
466290650Shselasky
467329209Shselaskystatic int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
468329209Shselasky{
469329209Shselasky	void *set_ctx;
470329209Shselasky	void *set_hca_cap;
471329209Shselasky	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
472329209Shselasky	int req_endianness;
473329209Shselasky	int err;
474329209Shselasky
475329209Shselasky	if (MLX5_CAP_GEN(dev, atomic)) {
476331807Shselasky		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
477329209Shselasky		if (err)
478329209Shselasky			return err;
479329209Shselasky	} else {
480329209Shselasky		return 0;
481329209Shselasky	}
482329209Shselasky
483329209Shselasky	req_endianness =
484329209Shselasky		MLX5_CAP_ATOMIC(dev,
485329209Shselasky				supported_atomic_req_8B_endianess_mode_1);
486329209Shselasky
487329209Shselasky	if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
488329209Shselasky		return 0;
489329209Shselasky
490329209Shselasky	set_ctx = kzalloc(set_sz, GFP_KERNEL);
491329209Shselasky	if (!set_ctx)
492329209Shselasky		return -ENOMEM;
493329209Shselasky
494329209Shselasky	MLX5_SET(set_hca_cap_in, set_ctx, op_mod,
495329209Shselasky		 MLX5_SET_HCA_CAP_OP_MOD_ATOMIC << 1);
496329209Shselasky	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
497329209Shselasky
498329209Shselasky	/* Set requestor to host endianness */
499329209Shselasky	MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
500329209Shselasky		 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
501329209Shselasky
502329209Shselasky	err = set_caps(dev, set_ctx, set_sz);
503329209Shselasky
504329209Shselasky	kfree(set_ctx);
505329209Shselasky	return err;
506329209Shselasky}
507329209Shselasky
508290650Shselaskystatic int set_hca_ctrl(struct mlx5_core_dev *dev)
509290650Shselasky{
510290650Shselasky	struct mlx5_reg_host_endianess he_in;
511290650Shselasky	struct mlx5_reg_host_endianess he_out;
512290650Shselasky	int err;
513290650Shselasky
514306233Shselasky	if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
515306233Shselasky	    !MLX5_CAP_GEN(dev, roce))
516306233Shselasky		return 0;
517306233Shselasky
518290650Shselasky	memset(&he_in, 0, sizeof(he_in));
519290650Shselasky	he_in.he = MLX5_SET_HOST_ENDIANNESS;
520290650Shselasky	err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
521290650Shselasky					&he_out, sizeof(he_out),
522290650Shselasky					MLX5_REG_HOST_ENDIANNESS, 0, 1);
523290650Shselasky	return err;
524290650Shselasky}
525290650Shselasky
526359540Skibstatic int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
527290650Shselasky{
528331807Shselasky	u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
529331807Shselasky	u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0};
530290650Shselasky
531290650Shselasky	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
532359540Skib	MLX5_SET(enable_hca_in, in, function_id, func_id);
533331807Shselasky	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
534290650Shselasky}
535290650Shselasky
536290650Shselaskystatic int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
537290650Shselasky{
538331807Shselasky	u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
539331807Shselasky	u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0};
540290650Shselasky
541290650Shselasky	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
542331807Shselasky	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
543290650Shselasky}
544290650Shselasky
545290650Shselaskystatic int mlx5_core_set_issi(struct mlx5_core_dev *dev)
546290650Shselasky{
547331807Shselasky	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0};
548331807Shselasky	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
549331807Shselasky	u32 sup_issi;
550290650Shselasky	int err;
551290650Shselasky
552290650Shselasky	MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
553290650Shselasky
554331807Shselasky	err = mlx5_cmd_exec(dev, query_in, sizeof(query_in), query_out, sizeof(query_out));
555290650Shselasky	if (err) {
556331807Shselasky		u32 syndrome;
557331807Shselasky		u8 status;
558331807Shselasky
559331807Shselasky		mlx5_cmd_mbox_status(query_out, &status, &syndrome);
560331807Shselasky		if (status == MLX5_CMD_STAT_BAD_OP_ERR) {
561353224Shselasky			mlx5_core_dbg(dev, "Only ISSI 0 is supported\n");
562290650Shselasky			return 0;
563290650Shselasky		}
564290650Shselasky
565353224Shselasky		mlx5_core_err(dev, "failed to query ISSI\n");
566290650Shselasky		return err;
567290650Shselasky	}
568290650Shselasky
569290650Shselasky	sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
570290650Shselasky
571290650Shselasky	if (sup_issi & (1 << 1)) {
572331807Shselasky		u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]	 = {0};
573331807Shselasky		u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
574290650Shselasky
575290650Shselasky		MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
576290650Shselasky		MLX5_SET(set_issi_in, set_in, current_issi, 1);
577290650Shselasky
578331807Shselasky		err = mlx5_cmd_exec(dev, set_in, sizeof(set_in), set_out, sizeof(set_out));
579290650Shselasky		if (err) {
580353224Shselasky			mlx5_core_err(dev, "failed to set ISSI=1 err(%d)\n", err);
581290650Shselasky			return err;
582290650Shselasky		}
583290650Shselasky
584290650Shselasky		dev->issi = 1;
585290650Shselasky
586290650Shselasky		return 0;
587290650Shselasky	} else if (sup_issi & (1 << 0)) {
588290650Shselasky		return 0;
589290650Shselasky	}
590290650Shselasky
591290650Shselasky	return -ENOTSUPP;
592290650Shselasky}
593290650Shselasky
594290650Shselasky
595290650Shselaskyint mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn)
596290650Shselasky{
597290650Shselasky	struct mlx5_eq_table *table = &dev->priv.eq_table;
598290650Shselasky	struct mlx5_eq *eq;
599290650Shselasky	int err = -ENOENT;
600290650Shselasky
601290650Shselasky	spin_lock(&table->lock);
602290650Shselasky	list_for_each_entry(eq, &table->comp_eqs_list, list) {
603290650Shselasky		if (eq->index == vector) {
604290650Shselasky			*eqn = eq->eqn;
605290650Shselasky			*irqn = eq->irqn;
606290650Shselasky			err = 0;
607290650Shselasky			break;
608290650Shselasky		}
609290650Shselasky	}
610290650Shselasky	spin_unlock(&table->lock);
611290650Shselasky
612290650Shselasky	return err;
613290650Shselasky}
614290650ShselaskyEXPORT_SYMBOL(mlx5_vector2eqn);
615290650Shselasky
616290650Shselaskystatic void free_comp_eqs(struct mlx5_core_dev *dev)
617290650Shselasky{
618290650Shselasky	struct mlx5_eq_table *table = &dev->priv.eq_table;
619290650Shselasky	struct mlx5_eq *eq, *n;
620290650Shselasky
621290650Shselasky	spin_lock(&table->lock);
622290650Shselasky	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
623290650Shselasky		list_del(&eq->list);
624290650Shselasky		spin_unlock(&table->lock);
625290650Shselasky		if (mlx5_destroy_unmap_eq(dev, eq))
626290650Shselasky			mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
627290650Shselasky				       eq->eqn);
628290650Shselasky		kfree(eq);
629290650Shselasky		spin_lock(&table->lock);
630290650Shselasky	}
631290650Shselasky	spin_unlock(&table->lock);
632290650Shselasky}
633290650Shselasky
634290650Shselaskystatic int alloc_comp_eqs(struct mlx5_core_dev *dev)
635290650Shselasky{
636290650Shselasky	struct mlx5_eq_table *table = &dev->priv.eq_table;
637290650Shselasky	struct mlx5_eq *eq;
638290650Shselasky	int ncomp_vec;
639290650Shselasky	int nent;
640290650Shselasky	int err;
641290650Shselasky	int i;
642290650Shselasky
643290650Shselasky	INIT_LIST_HEAD(&table->comp_eqs_list);
644290650Shselasky	ncomp_vec = table->num_comp_vectors;
645290650Shselasky	nent = MLX5_COMP_EQ_SIZE;
646290650Shselasky	for (i = 0; i < ncomp_vec; i++) {
647290650Shselasky		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
648290650Shselasky
649290650Shselasky		err = mlx5_create_map_eq(dev, eq,
650290650Shselasky					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
651353199Shselasky					 &dev->priv.uuari.uars[0]);
652290650Shselasky		if (err) {
653290650Shselasky			kfree(eq);
654290650Shselasky			goto clean;
655290650Shselasky		}
656290650Shselasky		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
657290650Shselasky		eq->index = i;
658290650Shselasky		spin_lock(&table->lock);
659290650Shselasky		list_add_tail(&eq->list, &table->comp_eqs_list);
660290650Shselasky		spin_unlock(&table->lock);
661290650Shselasky	}
662290650Shselasky
663290650Shselasky	return 0;
664290650Shselasky
665290650Shselaskyclean:
666290650Shselasky	free_comp_eqs(dev);
667290650Shselasky	return err;
668290650Shselasky}
669290650Shselasky
670290650Shselaskystatic int map_bf_area(struct mlx5_core_dev *dev)
671290650Shselasky{
672290650Shselasky	resource_size_t bf_start = pci_resource_start(dev->pdev, 0);
673290650Shselasky	resource_size_t bf_len = pci_resource_len(dev->pdev, 0);
674290650Shselasky
675290650Shselasky	dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len);
676290650Shselasky
677290650Shselasky	return dev->priv.bf_mapping ? 0 : -ENOMEM;
678290650Shselasky}
679290650Shselasky
680290650Shselaskystatic void unmap_bf_area(struct mlx5_core_dev *dev)
681290650Shselasky{
682290650Shselasky	if (dev->priv.bf_mapping)
683290650Shselasky		io_mapping_free(dev->priv.bf_mapping);
684290650Shselasky}
685290650Shselasky
686290650Shselaskystatic inline int fw_initializing(struct mlx5_core_dev *dev)
687290650Shselasky{
688290650Shselasky	return ioread32be(&dev->iseg->initializing) >> 31;
689290650Shselasky}
690290650Shselasky
691353254Shselaskystatic int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
692353254Shselasky			u32 warn_time_mili)
693290650Shselasky{
694353254Shselasky	int warn = jiffies + msecs_to_jiffies(warn_time_mili);
695353254Shselasky	int end = jiffies + msecs_to_jiffies(max_wait_mili);
696290650Shselasky	int err = 0;
697290650Shselasky
698353254Shselasky	MPASS(max_wait_mili > warn_time_mili);
699353254Shselasky
700353254Shselasky	while (fw_initializing(dev) == 1) {
701290650Shselasky		if (time_after(jiffies, end)) {
702290650Shselasky			err = -EBUSY;
703290650Shselasky			break;
704290650Shselasky		}
705353254Shselasky		if (warn_time_mili && time_after(jiffies, warn)) {
706353254Shselasky			mlx5_core_warn(dev,
707353254Shselasky			    "Waiting for FW initialization, timeout abort in %u s\n",
708353254Shselasky			    (unsigned int)(jiffies_to_msecs(end - warn) / 1000));
709353254Shselasky			warn = jiffies + msecs_to_jiffies(warn_time_mili);
710353254Shselasky		}
711290650Shselasky		msleep(FW_INIT_WAIT_MS);
712290650Shselasky	}
713290650Shselasky
714353254Shselasky	if (err != 0)
715353254Shselasky		mlx5_core_dbg(dev, "Full initializing bit dword = 0x%x\n",
716353254Shselasky		    ioread32be(&dev->iseg->initializing));
717353254Shselasky
718290650Shselasky	return err;
719290650Shselasky}
720290650Shselasky
721331580Shselaskystatic void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
722290650Shselasky{
723331580Shselasky	struct mlx5_device_context *dev_ctx;
724331580Shselasky	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
725331580Shselasky
726331580Shselasky	dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
727331580Shselasky	if (!dev_ctx)
728331580Shselasky		return;
729331580Shselasky
730331580Shselasky	dev_ctx->intf    = intf;
731331580Shselasky	CURVNET_SET_QUIET(vnet0);
732331580Shselasky	dev_ctx->context = intf->add(dev);
733331580Shselasky	CURVNET_RESTORE();
734331580Shselasky
735331580Shselasky	if (dev_ctx->context) {
736331580Shselasky		spin_lock_irq(&priv->ctx_lock);
737331580Shselasky		list_add_tail(&dev_ctx->list, &priv->ctx_list);
738331580Shselasky		spin_unlock_irq(&priv->ctx_lock);
739331580Shselasky	} else {
740331580Shselasky		kfree(dev_ctx);
741331580Shselasky	}
742331580Shselasky}
743331580Shselasky
744331580Shselaskystatic void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
745331580Shselasky{
746331580Shselasky	struct mlx5_device_context *dev_ctx;
747331580Shselasky	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
748331580Shselasky
749331580Shselasky	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
750331580Shselasky		if (dev_ctx->intf == intf) {
751331580Shselasky			spin_lock_irq(&priv->ctx_lock);
752331580Shselasky			list_del(&dev_ctx->list);
753331580Shselasky			spin_unlock_irq(&priv->ctx_lock);
754331580Shselasky
755331580Shselasky			intf->remove(dev, dev_ctx->context);
756331580Shselasky			kfree(dev_ctx);
757331580Shselasky			return;
758331580Shselasky		}
759331580Shselasky}
760331580Shselasky
761341958Shselaskyint
762341958Shselaskymlx5_register_device(struct mlx5_core_dev *dev)
763331580Shselasky{
764290650Shselasky	struct mlx5_priv *priv = &dev->priv;
765331580Shselasky	struct mlx5_interface *intf;
766290650Shselasky
767331580Shselasky	mutex_lock(&intf_mutex);
768331580Shselasky	list_add_tail(&priv->dev_list, &dev_list);
769331580Shselasky	list_for_each_entry(intf, &intf_list, list)
770331580Shselasky		mlx5_add_device(intf, priv);
771331580Shselasky	mutex_unlock(&intf_mutex);
772331580Shselasky
773331580Shselasky	return 0;
774331580Shselasky}
775331580Shselasky
776341958Shselaskyvoid
777341958Shselaskymlx5_unregister_device(struct mlx5_core_dev *dev)
778331580Shselasky{
779331580Shselasky	struct mlx5_priv *priv = &dev->priv;
780331580Shselasky	struct mlx5_interface *intf;
781331580Shselasky
782331580Shselasky	mutex_lock(&intf_mutex);
783331580Shselasky	list_for_each_entry(intf, &intf_list, list)
784331580Shselasky		mlx5_remove_device(intf, priv);
785331580Shselasky	list_del(&priv->dev_list);
786331580Shselasky	mutex_unlock(&intf_mutex);
787331580Shselasky}
788331580Shselasky
789331580Shselaskyint mlx5_register_interface(struct mlx5_interface *intf)
790331580Shselasky{
791331580Shselasky	struct mlx5_priv *priv;
792331580Shselasky
793331580Shselasky	if (!intf->add || !intf->remove)
794331580Shselasky		return -EINVAL;
795331580Shselasky
796331580Shselasky	mutex_lock(&intf_mutex);
797331580Shselasky	list_add_tail(&intf->list, &intf_list);
798331580Shselasky	list_for_each_entry(priv, &dev_list, dev_list)
799331580Shselasky		mlx5_add_device(intf, priv);
800331580Shselasky	mutex_unlock(&intf_mutex);
801331580Shselasky
802331580Shselasky	return 0;
803331580Shselasky}
804331580ShselaskyEXPORT_SYMBOL(mlx5_register_interface);
805331580Shselasky
806331580Shselaskyvoid mlx5_unregister_interface(struct mlx5_interface *intf)
807331580Shselasky{
808331580Shselasky	struct mlx5_priv *priv;
809331580Shselasky
810331580Shselasky	mutex_lock(&intf_mutex);
811331580Shselasky	list_for_each_entry(priv, &dev_list, dev_list)
812331580Shselasky		mlx5_remove_device(intf, priv);
813331580Shselasky	list_del(&intf->list);
814331580Shselasky	mutex_unlock(&intf_mutex);
815331580Shselasky}
816331580ShselaskyEXPORT_SYMBOL(mlx5_unregister_interface);
817331580Shselasky
818331580Shselaskyvoid *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
819331580Shselasky{
820331580Shselasky	struct mlx5_priv *priv = &mdev->priv;
821331580Shselasky	struct mlx5_device_context *dev_ctx;
822331580Shselasky	unsigned long flags;
823331580Shselasky	void *result = NULL;
824331580Shselasky
825331580Shselasky	spin_lock_irqsave(&priv->ctx_lock, flags);
826331580Shselasky
827331580Shselasky	list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
828331580Shselasky		if ((dev_ctx->intf->protocol == protocol) &&
829331580Shselasky		    dev_ctx->intf->get_dev) {
830331580Shselasky			result = dev_ctx->intf->get_dev(dev_ctx->context);
831331580Shselasky			break;
832331580Shselasky		}
833331580Shselasky
834331580Shselasky	spin_unlock_irqrestore(&priv->ctx_lock, flags);
835331580Shselasky
836331580Shselasky	return result;
837331580Shselasky}
838331580ShselaskyEXPORT_SYMBOL(mlx5_get_protocol_dev);
839331580Shselasky
840347853Shselaskystatic int mlx5_auto_fw_update;
841347853ShselaskySYSCTL_INT(_hw_mlx5, OID_AUTO, auto_fw_update, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
842347853Shselasky    &mlx5_auto_fw_update, 0,
843347853Shselasky    "Allow automatic firmware update on driver start");
844347847Shselaskystatic int
845347847Shselaskymlx5_firmware_update(struct mlx5_core_dev *dev)
846347847Shselasky{
847347847Shselasky	const struct firmware *fw;
848347847Shselasky	int err;
849347847Shselasky
850347853Shselasky	TUNABLE_INT_FETCH("hw.mlx5.auto_fw_update", &mlx5_auto_fw_update);
851347853Shselasky	if (!mlx5_auto_fw_update)
852347853Shselasky		return (0);
853347847Shselasky	fw = firmware_get("mlx5fw_mfa");
854347847Shselasky	if (fw) {
855347847Shselasky		err = mlx5_firmware_flash(dev, fw);
856347847Shselasky		firmware_put(fw, FIRMWARE_UNLOAD);
857347847Shselasky	}
858347847Shselasky	else
859347847Shselasky		return (-ENOENT);
860347847Shselasky
861347847Shselasky	return err;
862347847Shselasky}
863347847Shselasky
864331580Shselaskystatic int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
865331580Shselasky{
866331580Shselasky	struct pci_dev *pdev = dev->pdev;
867359540Skib	device_t bsddev;
868359540Skib	int err;
869331580Shselasky
870359540Skib	pdev = dev->pdev;
871359540Skib	bsddev = pdev->dev.bsddev;
872290650Shselasky	pci_set_drvdata(dev->pdev, dev);
873290650Shselasky	strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
874290650Shselasky	priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
875290650Shselasky
876290650Shselasky	mutex_init(&priv->pgdir_mutex);
877290650Shselasky	INIT_LIST_HEAD(&priv->pgdir_list);
878290650Shselasky	spin_lock_init(&priv->mkey_lock);
879290650Shselasky
880290650Shselasky	priv->numa_node = NUMA_NO_NODE;
881290650Shselasky
882331580Shselasky	err = mlx5_pci_enable_device(dev);
883290650Shselasky	if (err) {
884353224Shselasky		mlx5_core_err(dev, "Cannot enable PCI device, aborting\n");
885290650Shselasky		goto err_dbg;
886290650Shselasky	}
887290650Shselasky
888290650Shselasky	err = request_bar(pdev);
889290650Shselasky	if (err) {
890353224Shselasky		mlx5_core_err(dev, "error requesting BARs, aborting\n");
891290650Shselasky		goto err_disable;
892290650Shselasky	}
893290650Shselasky
894290650Shselasky	pci_set_master(pdev);
895290650Shselasky
896290650Shselasky	err = set_dma_caps(pdev);
897290650Shselasky	if (err) {
898353224Shselasky		mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n");
899290650Shselasky		goto err_clr_master;
900290650Shselasky	}
901290650Shselasky
902329212Shselasky	dev->iseg_base = pci_resource_start(dev->pdev, 0);
903329212Shselasky	dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
904290650Shselasky	if (!dev->iseg) {
905290650Shselasky		err = -ENOMEM;
906353224Shselasky		mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n");
907290650Shselasky		goto err_clr_master;
908290650Shselasky	}
909331580Shselasky
910337105Shselasky	return 0;
911331585Shselasky
912331580Shselaskyerr_clr_master:
913331580Shselasky	release_bar(dev->pdev);
914331580Shselaskyerr_disable:
915331580Shselasky	mlx5_pci_disable_device(dev);
916331580Shselaskyerr_dbg:
917331580Shselasky	return err;
918331580Shselasky}
919331580Shselasky
920331580Shselaskystatic void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
921331580Shselasky{
922359540Skib#ifdef PCI_IOV
923359540Skib	if (MLX5_CAP_GEN(dev, eswitch_flow_table))
924359540Skib		pci_iov_detach(dev->pdev->dev.bsddev);
925359540Skib#endif
926331580Shselasky	iounmap(dev->iseg);
927331580Shselasky	release_bar(dev->pdev);
928331580Shselasky	mlx5_pci_disable_device(dev);
929331580Shselasky}
930331580Shselasky
931331810Shselaskystatic int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
932331580Shselasky{
933331580Shselasky	int err;
934331580Shselasky
935337103Shselasky	err = mlx5_vsc_find_cap(dev);
936337103Shselasky	if (err)
937353224Shselasky		mlx5_core_err(dev, "Unable to find vendor specific capabilities\n");
938331815Shselasky
939331810Shselasky	err = mlx5_query_hca_caps(dev);
940331810Shselasky	if (err) {
941353224Shselasky		mlx5_core_err(dev, "query hca failed\n");
942331810Shselasky		goto out;
943331810Shselasky	}
944331810Shselasky
945331810Shselasky	err = mlx5_query_board_id(dev);
946331810Shselasky	if (err) {
947353224Shselasky		mlx5_core_err(dev, "query board id failed\n");
948331810Shselasky		goto out;
949331810Shselasky	}
950331810Shselasky
951331810Shselasky	err = mlx5_eq_init(dev);
952331810Shselasky	if (err) {
953353224Shselasky		mlx5_core_err(dev, "failed to initialize eq\n");
954331810Shselasky		goto out;
955331810Shselasky	}
956331810Shselasky
957331810Shselasky	MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
958331810Shselasky
959331810Shselasky	err = mlx5_init_cq_table(dev);
960331810Shselasky	if (err) {
961353224Shselasky		mlx5_core_err(dev, "failed to initialize cq table\n");
962331810Shselasky		goto err_eq_cleanup;
963331810Shselasky	}
964331810Shselasky
965331810Shselasky	mlx5_init_qp_table(dev);
966331810Shselasky	mlx5_init_srq_table(dev);
967331810Shselasky	mlx5_init_mr_table(dev);
968331810Shselasky
969341958Shselasky	mlx5_init_reserved_gids(dev);
970341958Shselasky	mlx5_fpga_init(dev);
971341958Shselasky
972331810Shselasky	return 0;
973331810Shselasky
974331810Shselaskyerr_eq_cleanup:
975331810Shselasky	mlx5_eq_cleanup(dev);
976331810Shselasky
977331810Shselaskyout:
978331810Shselasky	return err;
979331810Shselasky}
980331810Shselasky
981331810Shselaskystatic void mlx5_cleanup_once(struct mlx5_core_dev *dev)
982331810Shselasky{
983341958Shselasky	mlx5_fpga_cleanup(dev);
984341958Shselasky	mlx5_cleanup_reserved_gids(dev);
985331810Shselasky	mlx5_cleanup_mr_table(dev);
986331810Shselasky	mlx5_cleanup_srq_table(dev);
987331810Shselasky	mlx5_cleanup_qp_table(dev);
988331810Shselasky	mlx5_cleanup_cq_table(dev);
989331810Shselasky	mlx5_eq_cleanup(dev);
990331810Shselasky}
991331810Shselasky
992331810Shselaskystatic int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
993331810Shselasky			 bool boot)
994331810Shselasky{
995331810Shselasky	int err;
996331810Shselasky
997331580Shselasky	mutex_lock(&dev->intf_state_mutex);
998331580Shselasky	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
999353224Shselasky		mlx5_core_warn(dev, "interface is up, NOP\n");
1000331580Shselasky		goto out;
1001331580Shselasky	}
1002331580Shselasky
1003353224Shselasky	mlx5_core_dbg(dev, "firmware version: %d.%d.%d\n",
1004353224Shselasky	    fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
1005290650Shselasky
1006306233Shselasky	/*
1007306233Shselasky	 * On load removing any previous indication of internal error,
1008306233Shselasky	 * device is up
1009306233Shselasky	 */
1010306233Shselasky	dev->state = MLX5_DEVICE_STATE_UP;
1011306233Shselasky
1012353254Shselasky	/* wait for firmware to accept initialization segments configurations
1013353254Shselasky	*/
1014353254Shselasky	err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI,
1015353254Shselasky	    FW_INIT_WARN_MESSAGE_INTERVAL);
1016353254Shselasky	if (err) {
1017353254Shselasky		dev_err(&dev->pdev->dev,
1018353254Shselasky		    "Firmware over %d MS in pre-initializing state, aborting\n",
1019353254Shselasky		    FW_PRE_INIT_TIMEOUT_MILI);
1020353254Shselasky		goto out_err;
1021353254Shselasky	}
1022353254Shselasky
1023290650Shselasky	err = mlx5_cmd_init(dev);
1024290650Shselasky	if (err) {
1025353254Shselasky		mlx5_core_err(dev,
1026353254Shselasky		    "Failed initializing command interface, aborting\n");
1027331580Shselasky		goto out_err;
1028290650Shselasky	}
1029290650Shselasky
1030353254Shselasky	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
1031290650Shselasky	if (err) {
1032353254Shselasky		mlx5_core_err(dev,
1033353254Shselasky		    "Firmware over %d MS in initializing state, aborting\n",
1034353254Shselasky		    FW_INIT_TIMEOUT_MILI);
1035290650Shselasky		goto err_cmd_cleanup;
1036290650Shselasky	}
1037290650Shselasky
1038359540Skib	err = mlx5_core_enable_hca(dev, 0);
1039290650Shselasky	if (err) {
1040353224Shselasky		mlx5_core_err(dev, "enable hca failed\n");
1041331810Shselasky		goto err_cmd_cleanup;
1042290650Shselasky	}
1043290650Shselasky
1044290650Shselasky	err = mlx5_core_set_issi(dev);
1045290650Shselasky	if (err) {
1046353224Shselasky		mlx5_core_err(dev, "failed to set issi\n");
1047290650Shselasky		goto err_disable_hca;
1048290650Shselasky	}
1049290650Shselasky
1050290650Shselasky	err = mlx5_pagealloc_start(dev);
1051290650Shselasky	if (err) {
1052353224Shselasky		mlx5_core_err(dev, "mlx5_pagealloc_start failed\n");
1053290650Shselasky		goto err_disable_hca;
1054290650Shselasky	}
1055290650Shselasky
1056290650Shselasky	err = mlx5_satisfy_startup_pages(dev, 1);
1057290650Shselasky	if (err) {
1058353224Shselasky		mlx5_core_err(dev, "failed to allocate boot pages\n");
1059290650Shselasky		goto err_pagealloc_stop;
1060290650Shselasky	}
1061290650Shselasky
1062329209Shselasky	err = set_hca_ctrl(dev);
1063329209Shselasky	if (err) {
1064353224Shselasky		mlx5_core_err(dev, "set_hca_ctrl failed\n");
1065329209Shselasky		goto reclaim_boot_pages;
1066329209Shselasky	}
1067329209Shselasky
1068306233Shselasky	err = handle_hca_cap(dev);
1069290650Shselasky	if (err) {
1070353224Shselasky		mlx5_core_err(dev, "handle_hca_cap failed\n");
1071290650Shselasky		goto reclaim_boot_pages;
1072290650Shselasky	}
1073290650Shselasky
1074329209Shselasky	err = handle_hca_cap_atomic(dev);
1075290650Shselasky	if (err) {
1076353224Shselasky		mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
1077290650Shselasky		goto reclaim_boot_pages;
1078290650Shselasky	}
1079290650Shselasky
1080290650Shselasky	err = mlx5_satisfy_startup_pages(dev, 0);
1081290650Shselasky	if (err) {
1082353224Shselasky		mlx5_core_err(dev, "failed to allocate init pages\n");
1083290650Shselasky		goto reclaim_boot_pages;
1084290650Shselasky	}
1085290650Shselasky
1086290650Shselasky	err = mlx5_cmd_init_hca(dev);
1087290650Shselasky	if (err) {
1088353224Shselasky		mlx5_core_err(dev, "init hca failed\n");
1089290650Shselasky		goto reclaim_boot_pages;
1090290650Shselasky	}
1091290650Shselasky
1092290650Shselasky	mlx5_start_health_poll(dev);
1093290650Shselasky
1094331810Shselasky	if (boot && mlx5_init_once(dev, priv)) {
1095353224Shselasky		mlx5_core_err(dev, "sw objs init failed\n");
1096290650Shselasky		goto err_stop_poll;
1097290650Shselasky	}
1098290650Shselasky
1099290650Shselasky	err = mlx5_enable_msix(dev);
1100290650Shselasky	if (err) {
1101353224Shselasky		mlx5_core_err(dev, "enable msix failed\n");
1102331810Shselasky		goto err_cleanup_once;
1103290650Shselasky	}
1104290650Shselasky
1105290650Shselasky	err = mlx5_alloc_uuars(dev, &priv->uuari);
1106290650Shselasky	if (err) {
1107353224Shselasky		mlx5_core_err(dev, "Failed allocating uar, aborting\n");
1108331810Shselasky		goto err_disable_msix;
1109290650Shselasky	}
1110290650Shselasky
1111290650Shselasky	err = mlx5_start_eqs(dev);
1112290650Shselasky	if (err) {
1113353224Shselasky		mlx5_core_err(dev, "Failed to start pages and async EQs\n");
1114290650Shselasky		goto err_free_uar;
1115290650Shselasky	}
1116290650Shselasky
1117290650Shselasky	err = alloc_comp_eqs(dev);
1118290650Shselasky	if (err) {
1119353224Shselasky		mlx5_core_err(dev, "Failed to alloc completion EQs\n");
1120290650Shselasky		goto err_stop_eqs;
1121290650Shselasky	}
1122290650Shselasky
1123290650Shselasky	if (map_bf_area(dev))
1124353224Shselasky		mlx5_core_err(dev, "Failed to map blue flame area\n");
1125290650Shselasky
1126329200Shselasky	err = mlx5_init_fs(dev);
1127329200Shselasky	if (err) {
1128329200Shselasky		mlx5_core_err(dev, "flow steering init %d\n", err);
1129331810Shselasky		goto err_free_comp_eqs;
1130329200Shselasky	}
1131329200Shselasky
1132353197Shselasky	err = mlx5_mpfs_init(dev);
1133353197Shselasky	if (err) {
1134353197Shselasky		mlx5_core_err(dev, "mpfs init failed %d\n", err);
1135353197Shselasky		goto err_fs;
1136353197Shselasky	}
1137353197Shselasky
1138341958Shselasky	err = mlx5_fpga_device_start(dev);
1139341958Shselasky	if (err) {
1140353224Shselasky		mlx5_core_err(dev, "fpga device start failed %d\n", err);
1141353197Shselasky		goto err_mpfs;
1142341958Shselasky	}
1143341958Shselasky
1144331580Shselasky	err = mlx5_register_device(dev);
1145331580Shselasky	if (err) {
1146353224Shselasky		mlx5_core_err(dev, "mlx5_register_device failed %d\n", err);
1147353193Shselasky		goto err_fpga;
1148331580Shselasky	}
1149331580Shselasky
1150331580Shselasky	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1151331580Shselasky
1152331580Shselaskyout:
1153331580Shselasky	mutex_unlock(&dev->intf_state_mutex);
1154290650Shselasky	return 0;
1155290650Shselasky
1156353193Shselaskyerr_fpga:
1157353193Shselasky	mlx5_fpga_device_stop(dev);
1158353193Shselasky
1159353197Shselaskyerr_mpfs:
1160353197Shselasky	mlx5_mpfs_destroy(dev);
1161353197Shselasky
1162331810Shselaskyerr_fs:
1163331580Shselasky	mlx5_cleanup_fs(dev);
1164331810Shselasky
1165331810Shselaskyerr_free_comp_eqs:
1166331810Shselasky	free_comp_eqs(dev);
1167329200Shselasky	unmap_bf_area(dev);
1168329200Shselasky
1169290650Shselaskyerr_stop_eqs:
1170290650Shselasky	mlx5_stop_eqs(dev);
1171290650Shselasky
1172290650Shselaskyerr_free_uar:
1173290650Shselasky	mlx5_free_uuars(dev, &priv->uuari);
1174290650Shselasky
1175331810Shselaskyerr_disable_msix:
1176290650Shselasky	mlx5_disable_msix(dev);
1177290650Shselasky
1178331810Shselaskyerr_cleanup_once:
1179331810Shselasky	if (boot)
1180331810Shselasky		mlx5_cleanup_once(dev);
1181331810Shselasky
1182290650Shselaskyerr_stop_poll:
1183341934Shselasky	mlx5_stop_health_poll(dev, boot);
1184290650Shselasky	if (mlx5_cmd_teardown_hca(dev)) {
1185353224Shselasky		mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
1186331580Shselasky		goto out_err;
1187290650Shselasky	}
1188290650Shselasky
1189290650Shselaskyreclaim_boot_pages:
1190290650Shselasky	mlx5_reclaim_startup_pages(dev);
1191290650Shselasky
1192290650Shselaskyerr_pagealloc_stop:
1193290650Shselasky	mlx5_pagealloc_stop(dev);
1194290650Shselasky
1195290650Shselaskyerr_disable_hca:
1196290650Shselasky	mlx5_core_disable_hca(dev);
1197290650Shselasky
1198290650Shselaskyerr_cmd_cleanup:
1199290650Shselasky	mlx5_cmd_cleanup(dev);
1200290650Shselasky
1201331580Shselaskyout_err:
1202331580Shselasky	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1203331580Shselasky	mutex_unlock(&dev->intf_state_mutex);
1204290650Shselasky
1205290650Shselasky	return err;
1206290650Shselasky}
1207290650Shselasky
1208331810Shselaskystatic int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1209331810Shselasky			   bool cleanup)
1210290650Shselasky{
1211331580Shselasky	int err = 0;
1212290650Shselasky
1213331811Shselasky	if (cleanup)
1214331811Shselasky		mlx5_drain_health_recovery(dev);
1215331811Shselasky
1216331580Shselasky	mutex_lock(&dev->intf_state_mutex);
1217347799Shselasky	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1218353224Shselasky		mlx5_core_warn(dev, "%s: interface is down, NOP\n", __func__);
1219331810Shselasky                if (cleanup)
1220331810Shselasky                        mlx5_cleanup_once(dev);
1221331580Shselasky		goto out;
1222331580Shselasky	}
1223331580Shselasky
1224331580Shselasky	mlx5_unregister_device(dev);
1225331580Shselasky
1226359544Skib	mlx5_eswitch_cleanup(dev->priv.eswitch);
1227341958Shselasky	mlx5_fpga_device_stop(dev);
1228353197Shselasky	mlx5_mpfs_destroy(dev);
1229329200Shselasky	mlx5_cleanup_fs(dev);
1230290650Shselasky	unmap_bf_area(dev);
1231322144Shselasky	mlx5_wait_for_reclaim_vfs_pages(dev);
1232290650Shselasky	free_comp_eqs(dev);
1233290650Shselasky	mlx5_stop_eqs(dev);
1234290650Shselasky	mlx5_free_uuars(dev, &priv->uuari);
1235290650Shselasky	mlx5_disable_msix(dev);
1236331810Shselasky        if (cleanup)
1237331810Shselasky                mlx5_cleanup_once(dev);
1238341934Shselasky	mlx5_stop_health_poll(dev, cleanup);
1239331580Shselasky	err = mlx5_cmd_teardown_hca(dev);
1240331580Shselasky	if (err) {
1241353224Shselasky		mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
1242331580Shselasky		goto out;
1243290650Shselasky	}
1244290650Shselasky	mlx5_pagealloc_stop(dev);
1245290650Shselasky	mlx5_reclaim_startup_pages(dev);
1246290650Shselasky	mlx5_core_disable_hca(dev);
1247290650Shselasky	mlx5_cmd_cleanup(dev);
1248290650Shselasky
1249331580Shselaskyout:
1250331580Shselasky	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1251331580Shselasky	mutex_unlock(&dev->intf_state_mutex);
1252331580Shselasky	return err;
1253290650Shselasky}
1254290650Shselasky
1255331580Shselaskyvoid mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
1256331580Shselasky		     unsigned long param)
1257290650Shselasky{
1258290650Shselasky	struct mlx5_priv *priv = &dev->priv;
1259290650Shselasky	struct mlx5_device_context *dev_ctx;
1260290650Shselasky	unsigned long flags;
1261290650Shselasky
1262290650Shselasky	spin_lock_irqsave(&priv->ctx_lock, flags);
1263290650Shselasky
1264290650Shselasky	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
1265290650Shselasky		if (dev_ctx->intf->event)
1266290650Shselasky			dev_ctx->intf->event(dev, dev_ctx->context, event, param);
1267290650Shselasky
1268290650Shselasky	spin_unlock_irqrestore(&priv->ctx_lock, flags);
1269290650Shselasky}
1270290650Shselasky
1271290650Shselaskystruct mlx5_core_event_handler {
1272290650Shselasky	void (*event)(struct mlx5_core_dev *dev,
1273290650Shselasky		      enum mlx5_dev_event event,
1274290650Shselasky		      void *data);
1275290650Shselasky};
1276290650Shselasky
1277353206Shselasky#define	MLX5_STATS_DESC(a, b, c, d, e, ...) d, e,
1278353206Shselasky
1279353206Shselasky#define	MLX5_PORT_MODULE_ERROR_STATS(m)				\
1280353206Shselaskym(+1, u64, power_budget_exceeded, "power_budget", "Module Power Budget Exceeded") \
1281353206Shselaskym(+1, u64, long_range, "long_range", "Module Long Range for non MLNX cable/module") \
1282353206Shselaskym(+1, u64, bus_stuck, "bus_stuck", "Module Bus stuck(I2C or data shorted)") \
1283353206Shselaskym(+1, u64, no_eeprom, "no_eeprom", "No EEPROM/retry timeout") \
1284353206Shselaskym(+1, u64, enforce_part_number, "enforce_part_number", "Module Enforce part number list") \
1285353206Shselaskym(+1, u64, unknown_id, "unknown_id", "Module Unknown identifier") \
1286353206Shselaskym(+1, u64, high_temp, "high_temp", "Module High Temperature") \
1287361413Skibm(+1, u64, cable_shorted, "cable_shorted", "Module Cable is shorted") \
1288361413Skibm(+1, u64, pmd_type_not_enabled, "pmd_type_not_enabled", "PMD type is not enabled")
1289353206Shselasky
1290353206Shselaskystatic const char *mlx5_pme_err_desc[] = {
1291353206Shselasky	MLX5_PORT_MODULE_ERROR_STATS(MLX5_STATS_DESC)
1292353206Shselasky};
1293353206Shselasky
1294290650Shselaskystatic int init_one(struct pci_dev *pdev,
1295290650Shselasky		    const struct pci_device_id *id)
1296290650Shselasky{
1297290650Shselasky	struct mlx5_core_dev *dev;
1298290650Shselasky	struct mlx5_priv *priv;
1299338554Shselasky	device_t bsddev = pdev->dev.bsddev;
1300359540Skib#ifdef PCI_IOV
1301359540Skib	nvlist_t *pf_schema, *vf_schema;
1302359545Skib	int num_vfs, sriov_pos;
1303359540Skib#endif
1304353206Shselasky	int i,err;
1305353206Shselasky	struct sysctl_oid *pme_sysctl_node;
1306353206Shselasky	struct sysctl_oid *pme_err_sysctl_node;
1307359529Skib	struct sysctl_oid *cap_sysctl_node;
1308359529Skib	struct sysctl_oid *current_cap_sysctl_node;
1309359529Skib	struct sysctl_oid *max_cap_sysctl_node;
1310290650Shselasky
1311290650Shselasky	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1312290650Shselasky	priv = &dev->priv;
1313306233Shselasky	if (id)
1314306233Shselasky		priv->pci_dev_data = id->driver_data;
1315290650Shselasky
1316347835Shselasky	if (mlx5_prof_sel < 0 || mlx5_prof_sel >= ARRAY_SIZE(profiles)) {
1317353224Shselasky		device_printf(bsddev,
1318353224Shselasky		    "WARN: selected profile out of range, selecting default (%d)\n",
1319353224Shselasky		    MLX5_DEFAULT_PROF);
1320347835Shselasky		mlx5_prof_sel = MLX5_DEFAULT_PROF;
1321290650Shselasky	}
1322347835Shselasky	dev->profile = &profiles[mlx5_prof_sel];
1323331580Shselasky	dev->pdev = pdev;
1324290650Shselasky	dev->event = mlx5_core_event;
1325290650Shselasky
1326341948Shselasky	/* Set desc */
1327341948Shselasky	device_set_desc(bsddev, mlx5_version);
1328341948Shselasky
1329338554Shselasky	sysctl_ctx_init(&dev->sysctl_ctx);
1330338554Shselasky	SYSCTL_ADD_INT(&dev->sysctl_ctx,
1331338554Shselasky	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1332338554Shselasky	    OID_AUTO, "msix_eqvec", CTLFLAG_RDTUN, &dev->msix_eqvec, 0,
1333338554Shselasky	    "Maximum number of MSIX event queue vectors, if set");
1334347862Shselasky	SYSCTL_ADD_INT(&dev->sysctl_ctx,
1335347862Shselasky	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1336347862Shselasky	    OID_AUTO, "power_status", CTLFLAG_RD, &dev->pwr_status, 0,
1337347862Shselasky	    "0:Invalid 1:Sufficient 2:Insufficient");
1338347862Shselasky	SYSCTL_ADD_INT(&dev->sysctl_ctx,
1339347862Shselasky	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1340347862Shselasky	    OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0,
1341347862Shselasky	    "Current power value in Watts");
1342338554Shselasky
1343353206Shselasky	pme_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1344353206Shselasky	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1345353206Shselasky	    OID_AUTO, "pme_stats", CTLFLAG_RD, NULL,
1346353206Shselasky	    "Port module event statistics");
1347353206Shselasky	if (pme_sysctl_node == NULL) {
1348353206Shselasky		err = -ENOMEM;
1349353206Shselasky		goto clean_sysctl_ctx;
1350353206Shselasky	}
1351353206Shselasky	pme_err_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1352353206Shselasky	    SYSCTL_CHILDREN(pme_sysctl_node),
1353353206Shselasky	    OID_AUTO, "errors", CTLFLAG_RD, NULL,
1354353206Shselasky	    "Port module event error statistics");
1355353206Shselasky	if (pme_err_sysctl_node == NULL) {
1356353206Shselasky		err = -ENOMEM;
1357353206Shselasky		goto clean_sysctl_ctx;
1358353206Shselasky	}
1359353206Shselasky	SYSCTL_ADD_U64(&dev->sysctl_ctx,
1360353206Shselasky	    SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
1361353206Shselasky	    "module_plug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1362353206Shselasky	    &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_PLUGGED_ENABLED],
1363353206Shselasky	    0, "Number of time module plugged");
1364353206Shselasky	SYSCTL_ADD_U64(&dev->sysctl_ctx,
1365353206Shselasky	    SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
1366353206Shselasky	    "module_unplug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1367353206Shselasky	    &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_UNPLUGGED],
1368353206Shselasky	    0, "Number of time module unplugged");
1369353206Shselasky	for (i = 0 ; i < MLX5_MODULE_EVENT_ERROR_NUM; i++) {
1370353206Shselasky		SYSCTL_ADD_U64(&dev->sysctl_ctx,
1371353206Shselasky		    SYSCTL_CHILDREN(pme_err_sysctl_node), OID_AUTO,
1372353206Shselasky		    mlx5_pme_err_desc[2 * i], CTLFLAG_RD | CTLFLAG_MPSAFE,
1373353206Shselasky		    &dev->priv.pme_stats.error_counters[i],
1374353206Shselasky		    0, mlx5_pme_err_desc[2 * i + 1]);
1375353206Shselasky	}
1376353206Shselasky
1377359529Skib	cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1378359529Skib	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1379359529Skib	    OID_AUTO, "caps", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1380359529Skib	    "hardware capabilities raw bitstrings");
1381359529Skib	if (cap_sysctl_node == NULL) {
1382359529Skib		err = -ENOMEM;
1383359529Skib		goto clean_sysctl_ctx;
1384359529Skib	}
1385359529Skib	current_cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1386359529Skib	    SYSCTL_CHILDREN(cap_sysctl_node),
1387359529Skib	    OID_AUTO, "current", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1388359529Skib	    "");
1389359529Skib	if (current_cap_sysctl_node == NULL) {
1390359529Skib		err = -ENOMEM;
1391359529Skib		goto clean_sysctl_ctx;
1392359529Skib	}
1393359529Skib	max_cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1394359529Skib	    SYSCTL_CHILDREN(cap_sysctl_node),
1395359529Skib	    OID_AUTO, "max", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1396359529Skib	    "");
1397359529Skib	if (max_cap_sysctl_node == NULL) {
1398359529Skib		err = -ENOMEM;
1399359529Skib		goto clean_sysctl_ctx;
1400359529Skib	}
1401359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1402359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1403359529Skib	    OID_AUTO, "general", CTLFLAG_RD | CTLFLAG_MPSAFE,
1404359529Skib	    &dev->hca_caps_cur[MLX5_CAP_GENERAL],
1405359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1406359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1407359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1408359529Skib	    OID_AUTO, "general", CTLFLAG_RD | CTLFLAG_MPSAFE,
1409359529Skib	    &dev->hca_caps_max[MLX5_CAP_GENERAL],
1410359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1411359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1412359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1413359529Skib	    OID_AUTO, "ether", CTLFLAG_RD | CTLFLAG_MPSAFE,
1414359529Skib	    &dev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS],
1415359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1416359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1417359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1418359529Skib	    OID_AUTO, "ether", CTLFLAG_RD | CTLFLAG_MPSAFE,
1419359529Skib	    &dev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS],
1420359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1421359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1422359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1423359529Skib	    OID_AUTO, "odp", CTLFLAG_RD | CTLFLAG_MPSAFE,
1424359529Skib	    &dev->hca_caps_cur[MLX5_CAP_ODP],
1425359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1426359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1427359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1428359529Skib	    OID_AUTO, "odp", CTLFLAG_RD | CTLFLAG_MPSAFE,
1429359529Skib	    &dev->hca_caps_max[MLX5_CAP_ODP],
1430359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1431359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1432359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1433359529Skib	    OID_AUTO, "atomic", CTLFLAG_RD | CTLFLAG_MPSAFE,
1434359529Skib	    &dev->hca_caps_cur[MLX5_CAP_ATOMIC],
1435359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1436359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1437359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1438359529Skib	    OID_AUTO, "atomic", CTLFLAG_RD | CTLFLAG_MPSAFE,
1439359529Skib	    &dev->hca_caps_max[MLX5_CAP_ATOMIC],
1440359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1441359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1442359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1443359529Skib	    OID_AUTO, "roce", CTLFLAG_RD | CTLFLAG_MPSAFE,
1444359529Skib	    &dev->hca_caps_cur[MLX5_CAP_ROCE],
1445359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1446359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1447359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1448359529Skib	    OID_AUTO, "roce", CTLFLAG_RD | CTLFLAG_MPSAFE,
1449359529Skib	    &dev->hca_caps_max[MLX5_CAP_ROCE],
1450359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1451359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1452359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1453359529Skib	    OID_AUTO, "ipoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1454359529Skib	    &dev->hca_caps_cur[MLX5_CAP_IPOIB_OFFLOADS],
1455359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1456359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1457359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1458359529Skib	    OID_AUTO, "ipoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1459359529Skib	    &dev->hca_caps_max[MLX5_CAP_IPOIB_OFFLOADS],
1460359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1461359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1462359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1463359529Skib	    OID_AUTO, "eoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1464359529Skib	    &dev->hca_caps_cur[MLX5_CAP_EOIB_OFFLOADS],
1465359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1466359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1467359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1468359529Skib	    OID_AUTO, "eoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1469359529Skib	    &dev->hca_caps_max[MLX5_CAP_EOIB_OFFLOADS],
1470359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1471359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1472359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1473359529Skib	    OID_AUTO, "flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1474359529Skib	    &dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE],
1475359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1476359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1477359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1478359529Skib	    OID_AUTO, "flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1479359529Skib	    &dev->hca_caps_max[MLX5_CAP_FLOW_TABLE],
1480359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1481359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1482359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1483359529Skib	    OID_AUTO, "eswitch_flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1484359529Skib	    &dev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE],
1485359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1486359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1487359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1488359529Skib	    OID_AUTO, "eswitch_flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1489359529Skib	    &dev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE],
1490359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1491359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1492359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1493359529Skib	    OID_AUTO, "eswitch", CTLFLAG_RD | CTLFLAG_MPSAFE,
1494359529Skib	    &dev->hca_caps_cur[MLX5_CAP_ESWITCH],
1495359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1496359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1497359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1498359529Skib	    OID_AUTO, "eswitch", CTLFLAG_RD | CTLFLAG_MPSAFE,
1499359529Skib	    &dev->hca_caps_max[MLX5_CAP_ESWITCH],
1500359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1501359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1502359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1503359529Skib	    OID_AUTO, "snapshot", CTLFLAG_RD | CTLFLAG_MPSAFE,
1504359529Skib	    &dev->hca_caps_cur[MLX5_CAP_SNAPSHOT],
1505359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1506359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1507359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1508359529Skib	    OID_AUTO, "snapshot", CTLFLAG_RD | CTLFLAG_MPSAFE,
1509359529Skib	    &dev->hca_caps_max[MLX5_CAP_SNAPSHOT],
1510359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1511359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1512359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1513359529Skib	    OID_AUTO, "vector_calc", CTLFLAG_RD | CTLFLAG_MPSAFE,
1514359529Skib	    &dev->hca_caps_cur[MLX5_CAP_VECTOR_CALC],
1515359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1516359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1517359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1518359529Skib	    OID_AUTO, "vector_calc", CTLFLAG_RD | CTLFLAG_MPSAFE,
1519359529Skib	    &dev->hca_caps_max[MLX5_CAP_VECTOR_CALC],
1520359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1521359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1522359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1523359529Skib	    OID_AUTO, "qos", CTLFLAG_RD | CTLFLAG_MPSAFE,
1524359529Skib	    &dev->hca_caps_cur[MLX5_CAP_QOS],
1525359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1526359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1527359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1528359529Skib	    OID_AUTO, "qos", CTLFLAG_RD | CTLFLAG_MPSAFE,
1529359529Skib	    &dev->hca_caps_max[MLX5_CAP_QOS],
1530359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1531359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1532359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1533359529Skib	    OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1534359529Skib	    &dev->hca_caps_cur[MLX5_CAP_DEBUG],
1535359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1536359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1537359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1538359529Skib	    OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1539359529Skib	    &dev->hca_caps_max[MLX5_CAP_DEBUG],
1540359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1541359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1542359529Skib	    SYSCTL_CHILDREN(cap_sysctl_node),
1543359529Skib	    OID_AUTO, "pcam", CTLFLAG_RD | CTLFLAG_MPSAFE,
1544359529Skib	    &dev->caps.pcam, sizeof(dev->caps.pcam), "IU", "");
1545359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1546359529Skib	    SYSCTL_CHILDREN(cap_sysctl_node),
1547359529Skib	    OID_AUTO, "mcam", CTLFLAG_RD | CTLFLAG_MPSAFE,
1548359529Skib	    &dev->caps.mcam, sizeof(dev->caps.mcam), "IU", "");
1549359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1550359529Skib	    SYSCTL_CHILDREN(cap_sysctl_node),
1551359529Skib	    OID_AUTO, "qcam", CTLFLAG_RD | CTLFLAG_MPSAFE,
1552359529Skib	    &dev->caps.qcam, sizeof(dev->caps.qcam), "IU", "");
1553359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1554359529Skib	    SYSCTL_CHILDREN(cap_sysctl_node),
1555359529Skib	    OID_AUTO, "fpga", CTLFLAG_RD | CTLFLAG_MPSAFE,
1556359529Skib	    &dev->caps.fpga, sizeof(dev->caps.fpga), "IU", "");
1557353206Shselasky
1558290650Shselasky	INIT_LIST_HEAD(&priv->ctx_list);
1559290650Shselasky	spin_lock_init(&priv->ctx_lock);
1560341930Shselasky	mutex_init(&dev->pci_status_mutex);
1561341930Shselasky	mutex_init(&dev->intf_state_mutex);
1562347880Shselasky	mtx_init(&dev->dump_lock, "mlx5dmp", NULL, MTX_DEF | MTX_NEW);
1563331580Shselasky	err = mlx5_pci_init(dev, priv);
1564290650Shselasky	if (err) {
1565353224Shselasky		mlx5_core_err(dev, "mlx5_pci_init failed %d\n", err);
1566331580Shselasky		goto clean_dev;
1567290650Shselasky	}
1568290650Shselasky
1569341930Shselasky	err = mlx5_health_init(dev);
1570341930Shselasky	if (err) {
1571353224Shselasky		mlx5_core_err(dev, "mlx5_health_init failed %d\n", err);
1572341930Shselasky		goto close_pci;
1573341930Shselasky	}
1574331580Shselasky
1575331810Shselasky	mlx5_pagealloc_init(dev);
1576331810Shselasky
1577331810Shselasky	err = mlx5_load_one(dev, priv, true);
1578290650Shselasky	if (err) {
1579353224Shselasky		mlx5_core_err(dev, "mlx5_load_one failed %d\n", err);
1580331580Shselasky		goto clean_health;
1581290650Shselasky	}
1582290650Shselasky
1583331914Shselasky	mlx5_fwdump_prep(dev);
1584331914Shselasky
1585347847Shselasky	mlx5_firmware_update(dev);
1586347847Shselasky
1587359540Skib#ifdef PCI_IOV
1588359540Skib	if (MLX5_CAP_GEN(dev, vport_group_manager)) {
1589359545Skib		if (pci_find_extcap(bsddev, PCIZ_SRIOV, &sriov_pos) == 0) {
1590359545Skib			num_vfs = pci_read_config(bsddev, sriov_pos +
1591359545Skib			    PCIR_SRIOV_TOTAL_VFS, 2);
1592359545Skib		} else {
1593359548Skib			mlx5_core_info(dev, "cannot find SR-IOV PCIe cap\n");
1594359545Skib			num_vfs = 0;
1595359545Skib		}
1596359545Skib		err = mlx5_eswitch_init(dev, 1 + num_vfs);
1597359544Skib		if (err == 0) {
1598359544Skib			pf_schema = pci_iov_schema_alloc_node();
1599359544Skib			vf_schema = pci_iov_schema_alloc_node();
1600359544Skib			pci_iov_schema_add_unicast_mac(vf_schema,
1601359544Skib			    iov_mac_addr_name, 0, NULL);
1602359544Skib			err = pci_iov_attach(bsddev, pf_schema, vf_schema);
1603359544Skib			if (err != 0) {
1604359544Skib				device_printf(bsddev,
1605359540Skib			    "Failed to initialize SR-IOV support, error %d\n",
1606359544Skib				    err);
1607359544Skib			}
1608359544Skib		} else {
1609359544Skib			mlx5_core_err(dev, "eswitch init failed, error %d\n",
1610359540Skib			    err);
1611359540Skib		}
1612359540Skib	}
1613359540Skib#endif
1614359540Skib
1615341930Shselasky	pci_save_state(bsddev);
1616290650Shselasky	return 0;
1617290650Shselasky
1618331580Shselaskyclean_health:
1619331810Shselasky	mlx5_pagealloc_cleanup(dev);
1620341930Shselasky	mlx5_health_cleanup(dev);
1621331580Shselaskyclose_pci:
1622341930Shselasky	mlx5_pci_close(dev, priv);
1623331580Shselaskyclean_dev:
1624353206Shselasky	mtx_destroy(&dev->dump_lock);
1625353206Shselaskyclean_sysctl_ctx:
1626338554Shselasky	sysctl_ctx_free(&dev->sysctl_ctx);
1627290650Shselasky	kfree(dev);
1628290650Shselasky	return err;
1629290650Shselasky}
1630290650Shselasky
1631290650Shselaskystatic void remove_one(struct pci_dev *pdev)
1632290650Shselasky{
1633290650Shselasky	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1634331580Shselasky	struct mlx5_priv *priv = &dev->priv;
1635290650Shselasky
1636331810Shselasky	if (mlx5_unload_one(dev, priv, true)) {
1637353224Shselasky		mlx5_core_err(dev, "mlx5_unload_one failed\n");
1638331580Shselasky		mlx5_health_cleanup(dev);
1639331580Shselasky		return;
1640331580Shselasky	}
1641331580Shselasky
1642331810Shselasky	mlx5_pagealloc_cleanup(dev);
1643331580Shselasky	mlx5_health_cleanup(dev);
1644347880Shselasky	mlx5_fwdump_clean(dev);
1645331580Shselasky	mlx5_pci_close(dev, priv);
1646347880Shselasky	mtx_destroy(&dev->dump_lock);
1647331580Shselasky	pci_set_drvdata(pdev, NULL);
1648338554Shselasky	sysctl_ctx_free(&dev->sysctl_ctx);
1649290650Shselasky	kfree(dev);
1650290650Shselasky}
1651290650Shselasky
1652331580Shselaskystatic pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
1653331580Shselasky					      pci_channel_state_t state)
1654331580Shselasky{
1655331580Shselasky	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1656331580Shselasky	struct mlx5_priv *priv = &dev->priv;
1657331580Shselasky
1658353224Shselasky	mlx5_core_info(dev, "%s was called\n", __func__);
1659331810Shselasky	mlx5_enter_error_state(dev, false);
1660331810Shselasky	mlx5_unload_one(dev, priv, false);
1661331914Shselasky
1662331582Shselasky	if (state) {
1663331582Shselasky		mlx5_drain_health_wq(dev);
1664331582Shselasky		mlx5_pci_disable_device(dev);
1665331582Shselasky	}
1666331582Shselasky
1667331580Shselasky	return state == pci_channel_io_perm_failure ?
1668331580Shselasky		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
1669331580Shselasky}
1670331580Shselasky
1671331580Shselaskystatic pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
1672331580Shselasky{
1673331580Shselasky	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1674331580Shselasky	int err = 0;
1675331580Shselasky
1676353224Shselasky	mlx5_core_info(dev,"%s was called\n", __func__);
1677331580Shselasky
1678331580Shselasky	err = mlx5_pci_enable_device(dev);
1679331580Shselasky	if (err) {
1680353224Shselasky		mlx5_core_err(dev, "mlx5_pci_enable_device failed with error code: %d\n"
1681353224Shselasky			,err);
1682331580Shselasky		return PCI_ERS_RESULT_DISCONNECT;
1683331580Shselasky	}
1684331580Shselasky	pci_set_master(pdev);
1685331580Shselasky	pci_set_powerstate(pdev->dev.bsddev, PCI_POWERSTATE_D0);
1686331580Shselasky	pci_restore_state(pdev->dev.bsddev);
1687331816Shselasky	pci_save_state(pdev->dev.bsddev);
1688331580Shselasky
1689331580Shselasky	return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
1690331580Shselasky}
1691331580Shselasky
1692331580Shselasky/* wait for the device to show vital signs. For now we check
1693331580Shselasky * that we can read the device ID and that the health buffer
1694331580Shselasky * shows a non zero value which is different than 0xffffffff
1695331580Shselasky */
1696331580Shselaskystatic void wait_vital(struct pci_dev *pdev)
1697331580Shselasky{
1698331580Shselasky	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1699331580Shselasky	struct mlx5_core_health *health = &dev->priv.health;
1700331580Shselasky	const int niter = 100;
1701331580Shselasky	u32 count;
1702331580Shselasky	u16 did;
1703331580Shselasky	int i;
1704331580Shselasky
1705331580Shselasky	/* Wait for firmware to be ready after reset */
1706331580Shselasky	msleep(1000);
1707331580Shselasky	for (i = 0; i < niter; i++) {
1708331580Shselasky		if (pci_read_config_word(pdev, 2, &did)) {
1709353224Shselasky			mlx5_core_warn(dev, "failed reading config word\n");
1710331580Shselasky			break;
1711331580Shselasky		}
1712331580Shselasky		if (did == pdev->device) {
1713353224Shselasky			mlx5_core_info(dev,
1714353224Shselasky			    "device ID correctly read after %d iterations\n", i);
1715331580Shselasky			break;
1716331580Shselasky		}
1717331580Shselasky		msleep(50);
1718331580Shselasky	}
1719331580Shselasky	if (i == niter)
1720353224Shselasky		mlx5_core_warn(dev, "could not read device ID\n");
1721331580Shselasky
1722331580Shselasky	for (i = 0; i < niter; i++) {
1723331580Shselasky		count = ioread32be(health->health_counter);
1724331580Shselasky		if (count && count != 0xffffffff) {
1725353224Shselasky			mlx5_core_info(dev,
1726353224Shselasky			"Counter value 0x%x after %d iterations\n", count, i);
1727331580Shselasky			break;
1728331580Shselasky		}
1729331580Shselasky		msleep(50);
1730331580Shselasky	}
1731331580Shselasky
1732331580Shselasky	if (i == niter)
1733353224Shselasky		mlx5_core_warn(dev, "could not read device ID\n");
1734331580Shselasky}
1735331580Shselasky
1736331580Shselaskystatic void mlx5_pci_resume(struct pci_dev *pdev)
1737331580Shselasky{
1738331580Shselasky	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1739331580Shselasky	struct mlx5_priv *priv = &dev->priv;
1740331580Shselasky	int err;
1741331580Shselasky
1742353224Shselasky	mlx5_core_info(dev,"%s was called\n", __func__);
1743331580Shselasky
1744331580Shselasky	wait_vital(pdev);
1745331580Shselasky
1746331810Shselasky	err = mlx5_load_one(dev, priv, false);
1747331580Shselasky	if (err)
1748353224Shselasky		mlx5_core_err(dev,
1749353224Shselasky		    "mlx5_load_one failed with error code: %d\n" ,err);
1750331580Shselasky	else
1751353224Shselasky		mlx5_core_info(dev,"device recovered\n");
1752331580Shselasky}
1753331580Shselasky
1754331580Shselaskystatic const struct pci_error_handlers mlx5_err_handler = {
1755331580Shselasky	.error_detected = mlx5_pci_err_detected,
1756331580Shselasky	.slot_reset	= mlx5_pci_slot_reset,
1757331580Shselasky	.resume		= mlx5_pci_resume
1758331580Shselasky};
1759331580Shselasky
1760359540Skib#ifdef PCI_IOV
1761359540Skibstatic int
1762359540Skibmlx5_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *pf_config)
1763359540Skib{
1764359540Skib	struct pci_dev *pdev;
1765359540Skib	struct mlx5_core_dev *core_dev;
1766359540Skib	struct mlx5_priv *priv;
1767359544Skib	int err;
1768359540Skib
1769359540Skib	pdev = device_get_softc(dev);
1770359540Skib	core_dev = pci_get_drvdata(pdev);
1771359540Skib	priv = &core_dev->priv;
1772359540Skib
1773359545Skib	if (priv->eswitch == NULL)
1774359545Skib		return (ENXIO);
1775359545Skib	if (priv->eswitch->total_vports < num_vfs + 1)
1776359545Skib		num_vfs = priv->eswitch->total_vports - 1;
1777359544Skib	err = mlx5_eswitch_enable_sriov(priv->eswitch, num_vfs);
1778359544Skib	return (-err);
1779359540Skib}
1780359540Skib
1781359540Skibstatic void
1782359540Skibmlx5_iov_uninit(device_t dev)
1783359540Skib{
1784359540Skib	struct pci_dev *pdev;
1785359540Skib	struct mlx5_core_dev *core_dev;
1786359540Skib	struct mlx5_priv *priv;
1787359540Skib
1788359540Skib	pdev = device_get_softc(dev);
1789359540Skib	core_dev = pci_get_drvdata(pdev);
1790359540Skib	priv = &core_dev->priv;
1791359544Skib
1792359544Skib	mlx5_eswitch_disable_sriov(priv->eswitch);
1793359540Skib}
1794359540Skib
1795359540Skibstatic int
1796359540Skibmlx5_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *vf_config)
1797359540Skib{
1798359540Skib	struct pci_dev *pdev;
1799359540Skib	struct mlx5_core_dev *core_dev;
1800359540Skib	struct mlx5_priv *priv;
1801359544Skib	const void *mac;
1802359544Skib	size_t mac_size;
1803359540Skib	int error;
1804359540Skib
1805359540Skib	pdev = device_get_softc(dev);
1806359540Skib	core_dev = pci_get_drvdata(pdev);
1807359540Skib	priv = &core_dev->priv;
1808359540Skib
1809359545Skib	if (vfnum + 1 >= priv->eswitch->total_vports)
1810359545Skib		return (ENXIO);
1811359545Skib
1812359544Skib	if (nvlist_exists_binary(vf_config, iov_mac_addr_name)) {
1813359544Skib		mac = nvlist_get_binary(vf_config, iov_mac_addr_name,
1814359544Skib		    &mac_size);
1815359544Skib		error = -mlx5_eswitch_set_vport_mac(priv->eswitch,
1816359544Skib		    vfnum + 1, __DECONST(u8 *, mac));
1817359544Skib	}
1818359544Skib
1819359544Skib	error = -mlx5_eswitch_set_vport_state(priv->eswitch, vfnum + 1,
1820359544Skib	    VPORT_STATE_FOLLOW);
1821359544Skib	if (error != 0) {
1822359544Skib		mlx5_core_err(core_dev,
1823359544Skib		    "upping vport for VF %d failed, error %d\n",
1824359544Skib		    vfnum + 1, error);
1825359544Skib	}
1826359540Skib	error = -mlx5_core_enable_hca(core_dev, vfnum + 1);
1827359540Skib	if (error != 0) {
1828359540Skib		mlx5_core_err(core_dev, "enabling VF %d failed, error %d\n",
1829359544Skib		    vfnum + 1, error);
1830359540Skib	}
1831359540Skib	return (error);
1832359540Skib}
1833359540Skib#endif
1834359540Skib
1835331810Shselaskystatic int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
1836331810Shselasky{
1837347818Shselasky	bool fast_teardown, force_teardown;
1838331810Shselasky	int err;
1839331810Shselasky
1840347819Shselasky	if (!mlx5_fast_unload_enabled) {
1841347819Shselasky		mlx5_core_dbg(dev, "fast unload is disabled by user\n");
1842347819Shselasky		return -EOPNOTSUPP;
1843347819Shselasky	}
1844347819Shselasky
1845347818Shselasky	fast_teardown = MLX5_CAP_GEN(dev, fast_teardown);
1846347818Shselasky	force_teardown = MLX5_CAP_GEN(dev, force_teardown);
1847347818Shselasky
1848347818Shselasky	mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown);
1849347818Shselasky	mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown);
1850347818Shselasky
1851347818Shselasky	if (!fast_teardown && !force_teardown)
1852331810Shselasky		return -EOPNOTSUPP;
1853331810Shselasky
1854331810Shselasky	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1855331810Shselasky		mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
1856331810Shselasky		return -EAGAIN;
1857331810Shselasky	}
1858331810Shselasky
1859341934Shselasky	/* Panic tear down fw command will stop the PCI bus communication
1860341934Shselasky	 * with the HCA, so the health polll is no longer needed.
1861341934Shselasky	 */
1862341934Shselasky	mlx5_drain_health_wq(dev);
1863341934Shselasky	mlx5_stop_health_poll(dev, false);
1864341934Shselasky
1865347818Shselasky	err = mlx5_cmd_fast_teardown_hca(dev);
1866347818Shselasky	if (!err)
1867347818Shselasky		goto done;
1868347818Shselasky
1869331810Shselasky	err = mlx5_cmd_force_teardown_hca(dev);
1870347818Shselasky	if (!err)
1871347818Shselasky		goto done;
1872331810Shselasky
1873347818Shselasky	mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", err);
1874347818Shselasky	mlx5_start_health_poll(dev);
1875347818Shselasky	return err;
1876347818Shselaskydone:
1877331810Shselasky	mlx5_enter_error_state(dev, true);
1878331810Shselasky	return 0;
1879331810Shselasky}
1880331810Shselasky
1881358015Shselaskystatic void mlx5_shutdown_disable_interrupts(struct mlx5_core_dev *mdev)
1882347802Shselasky{
1883347802Shselasky	int nvec = mdev->priv.eq_table.num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
1884347802Shselasky	int x;
1885347802Shselasky
1886347802Shselasky	mdev->priv.disable_irqs = 1;
1887347802Shselasky
1888347802Shselasky	/* wait for all IRQ handlers to finish processing */
1889347802Shselasky	for (x = 0; x != nvec; x++)
1890347802Shselasky		synchronize_irq(mdev->priv.msix_arr[x].vector);
1891347802Shselasky}
1892347802Shselasky
1893329211Shselaskystatic void shutdown_one(struct pci_dev *pdev)
1894329211Shselasky{
1895331580Shselasky	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1896331580Shselasky	struct mlx5_priv *priv = &dev->priv;
1897331810Shselasky	int err;
1898331580Shselasky
1899347802Shselasky	/* enter polling mode */
1900347802Shselasky	mlx5_cmd_use_polling(dev);
1901347802Shselasky
1902355653Skib	set_bit(MLX5_INTERFACE_STATE_TEARDOWN, &dev->intf_state);
1903355653Skib
1904347802Shselasky	/* disable all interrupts */
1905358015Shselasky	mlx5_shutdown_disable_interrupts(dev);
1906347802Shselasky
1907331810Shselasky	err = mlx5_try_fast_unload(dev);
1908331810Shselasky	if (err)
1909331810Shselasky	        mlx5_unload_one(dev, priv, false);
1910331580Shselasky	mlx5_pci_disable_device(dev);
1911329211Shselasky}
1912329211Shselasky
1913290650Shselaskystatic const struct pci_device_id mlx5_core_pci_table[] = {
1914290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4113) }, /* Connect-IB */
1915290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4114) }, /* Connect-IB VF */
1916290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4115) }, /* ConnectX-4 */
1917290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4116) }, /* ConnectX-4 VF */
1918290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4117) }, /* ConnectX-4LX */
1919290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4118) }, /* ConnectX-4LX VF */
1920306233Shselasky	{ PCI_VDEVICE(MELLANOX, 4119) }, /* ConnectX-5 */
1921306233Shselasky	{ PCI_VDEVICE(MELLANOX, 4120) }, /* ConnectX-5 VF */
1922290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4121) },
1923290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4122) },
1924290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4123) },
1925290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4124) },
1926290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4125) },
1927290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4126) },
1928290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4127) },
1929290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4128) },
1930290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4129) },
1931290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4130) },
1932290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4131) },
1933290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4132) },
1934290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4133) },
1935290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4134) },
1936290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4135) },
1937290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4136) },
1938290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4137) },
1939290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4138) },
1940290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4139) },
1941290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4140) },
1942290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4141) },
1943290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4142) },
1944290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4143) },
1945290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4144) },
1946290650Shselasky	{ 0, }
1947290650Shselasky};
1948290650Shselasky
1949290650ShselaskyMODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
1950290650Shselasky
1951331809Shselaskyvoid mlx5_disable_device(struct mlx5_core_dev *dev)
1952331809Shselasky{
1953331809Shselasky	mlx5_pci_err_detected(dev->pdev, 0);
1954331809Shselasky}
1955331809Shselasky
1956331809Shselaskyvoid mlx5_recover_device(struct mlx5_core_dev *dev)
1957331809Shselasky{
1958331809Shselasky	mlx5_pci_disable_device(dev);
1959331809Shselasky	if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
1960331809Shselasky		mlx5_pci_resume(dev->pdev);
1961331809Shselasky}
1962331809Shselasky
1963331586Shselaskystruct pci_driver mlx5_core_driver = {
1964290650Shselasky	.name           = DRIVER_NAME,
1965290650Shselasky	.id_table       = mlx5_core_pci_table,
1966329211Shselasky	.shutdown	= shutdown_one,
1967290650Shselasky	.probe          = init_one,
1968331580Shselasky	.remove         = remove_one,
1969359540Skib	.err_handler	= &mlx5_err_handler,
1970359540Skib#ifdef PCI_IOV
1971359540Skib	.bsd_iov_init	= mlx5_iov_init,
1972359540Skib	.bsd_iov_uninit	= mlx5_iov_uninit,
1973359540Skib	.bsd_iov_add_vf	= mlx5_iov_add_vf,
1974359540Skib#endif
1975290650Shselasky};
1976290650Shselasky
1977290650Shselaskystatic int __init init(void)
1978290650Shselasky{
1979290650Shselasky	int err;
1980290650Shselasky
1981290650Shselasky	err = pci_register_driver(&mlx5_core_driver);
1982290650Shselasky	if (err)
1983331580Shselasky		goto err_debug;
1984290650Shselasky
1985347871Shselasky	err = mlx5_ctl_init();
1986331586Shselasky	if (err)
1987347871Shselasky		goto err_ctl;
1988331586Shselasky
1989331586Shselasky 	return 0;
1990331586Shselasky
1991347871Shselaskyerr_ctl:
1992331586Shselasky	pci_unregister_driver(&mlx5_core_driver);
1993290650Shselasky
1994290650Shselaskyerr_debug:
1995290650Shselasky	return err;
1996290650Shselasky}
1997290650Shselasky
1998290650Shselaskystatic void __exit cleanup(void)
1999290650Shselasky{
2000347871Shselasky	mlx5_ctl_fini();
2001290650Shselasky	pci_unregister_driver(&mlx5_core_driver);
2002290650Shselasky}
2003290650Shselasky
2004290650Shselaskymodule_init(init);
2005290650Shselaskymodule_exit(cleanup);
2006