1290650Shselasky/*-
2347819Shselasky * Copyright (c) 2013-2019, Mellanox Technologies, Ltd.  All rights reserved.
3290650Shselasky *
4290650Shselasky * Redistribution and use in source and binary forms, with or without
5290650Shselasky * modification, are permitted provided that the following conditions
6290650Shselasky * are met:
7290650Shselasky * 1. Redistributions of source code must retain the above copyright
8290650Shselasky *    notice, this list of conditions and the following disclaimer.
9290650Shselasky * 2. Redistributions in binary form must reproduce the above copyright
10290650Shselasky *    notice, this list of conditions and the following disclaimer in the
11290650Shselasky *    documentation and/or other materials provided with the distribution.
12290650Shselasky *
13290650Shselasky * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14290650Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15290650Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16290650Shselasky * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17290650Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18290650Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19290650Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20290650Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21290650Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22290650Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23290650Shselasky * SUCH DAMAGE.
24290650Shselasky *
25290650Shselasky * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_core/mlx5_main.c 369092 2021-01-22 12:49:39Z hselasky $
26290650Shselasky */
27290650Shselasky
28290650Shselasky#include <linux/kmod.h>
29290650Shselasky#include <linux/module.h>
30290650Shselasky#include <linux/errno.h>
31290650Shselasky#include <linux/pci.h>
32290650Shselasky#include <linux/dma-mapping.h>
33290650Shselasky#include <linux/slab.h>
34290650Shselasky#include <linux/io-mapping.h>
35290650Shselasky#include <linux/interrupt.h>
36347802Shselasky#include <linux/hardirq.h>
37290650Shselasky#include <dev/mlx5/driver.h>
38290650Shselasky#include <dev/mlx5/cq.h>
39290650Shselasky#include <dev/mlx5/qp.h>
40290650Shselasky#include <dev/mlx5/srq.h>
41353197Shselasky#include <dev/mlx5/mpfs.h>
42359544Skib#include <dev/mlx5/vport.h>
43290650Shselasky#include <linux/delay.h>
44290650Shselasky#include <dev/mlx5/mlx5_ifc.h>
45341958Shselasky#include <dev/mlx5/mlx5_fpga/core.h>
46341958Shselasky#include <dev/mlx5/mlx5_lib/mlx5.h>
47290650Shselasky#include "mlx5_core.h"
48359544Skib#include "eswitch.h"
49329200Shselasky#include "fs_core.h"
50359540Skib#ifdef PCI_IOV
51359540Skib#include <sys/nv.h>
52359540Skib#include <dev/pci/pci_iov.h>
53359540Skib#include <sys/iov_schema.h>
54359540Skib#endif
55290650Shselasky
56341948Shselaskystatic const char mlx5_version[] = "Mellanox Core driver "
57341948Shselasky	DRIVER_VERSION " (" DRIVER_RELDATE ")";
58290650ShselaskyMODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
59290650ShselaskyMODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
60290650ShselaskyMODULE_LICENSE("Dual BSD/GPL");
61290650ShselaskyMODULE_DEPEND(mlx5, linuxkpi, 1, 1, 1);
62347839ShselaskyMODULE_DEPEND(mlx5, mlxfw, 1, 1, 1);
63347847ShselaskyMODULE_DEPEND(mlx5, firmware, 1, 1, 1);
64290650ShselaskyMODULE_VERSION(mlx5, 1);
65290650Shselasky
66347835ShselaskySYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW, 0, "mlx5 hardware controls");
67347835Shselasky
68290650Shselaskyint mlx5_core_debug_mask;
69347835ShselaskySYSCTL_INT(_hw_mlx5, OID_AUTO, debug_mask, CTLFLAG_RWTUN,
70347835Shselasky    &mlx5_core_debug_mask, 0,
71347835Shselasky    "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
72290650Shselasky
73290650Shselasky#define MLX5_DEFAULT_PROF	2
74347835Shselaskystatic int mlx5_prof_sel = MLX5_DEFAULT_PROF;
75347835ShselaskySYSCTL_INT(_hw_mlx5, OID_AUTO, prof_sel, CTLFLAG_RWTUN,
76347835Shselasky    &mlx5_prof_sel, 0,
77347835Shselasky    "profile selector. Valid range 0 - 2");
78290650Shselasky
79347819Shselaskystatic int mlx5_fast_unload_enabled = 1;
80347819ShselaskySYSCTL_INT(_hw_mlx5, OID_AUTO, fast_unload_enabled, CTLFLAG_RWTUN,
81347819Shselasky    &mlx5_fast_unload_enabled, 0,
82347819Shselasky    "Set to enable fast unload. Clear to disable.");
83347819Shselasky
84290650Shselasky#define NUMA_NO_NODE       -1
85290650Shselasky
86290650Shselaskystatic LIST_HEAD(intf_list);
87290650Shselaskystatic LIST_HEAD(dev_list);
88290650Shselaskystatic DEFINE_MUTEX(intf_mutex);
89290650Shselasky
90290650Shselaskystruct mlx5_device_context {
91290650Shselasky	struct list_head	list;
92290650Shselasky	struct mlx5_interface  *intf;
93290650Shselasky	void		       *context;
94290650Shselasky};
95290650Shselasky
96329209Shselaskyenum {
97329209Shselasky	MLX5_ATOMIC_REQ_MODE_BE = 0x0,
98329209Shselasky	MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1,
99329209Shselasky};
100329209Shselasky
101290650Shselaskystatic struct mlx5_profile profiles[] = {
102290650Shselasky	[0] = {
103290650Shselasky		.mask           = 0,
104290650Shselasky	},
105290650Shselasky	[1] = {
106290650Shselasky		.mask		= MLX5_PROF_MASK_QP_SIZE,
107290650Shselasky		.log_max_qp	= 12,
108290650Shselasky	},
109290650Shselasky	[2] = {
110290650Shselasky		.mask		= MLX5_PROF_MASK_QP_SIZE |
111290650Shselasky				  MLX5_PROF_MASK_MR_CACHE,
112290650Shselasky		.log_max_qp	= 17,
113290650Shselasky		.mr_cache[0]	= {
114290650Shselasky			.size	= 500,
115290650Shselasky			.limit	= 250
116290650Shselasky		},
117290650Shselasky		.mr_cache[1]	= {
118290650Shselasky			.size	= 500,
119290650Shselasky			.limit	= 250
120290650Shselasky		},
121290650Shselasky		.mr_cache[2]	= {
122290650Shselasky			.size	= 500,
123290650Shselasky			.limit	= 250
124290650Shselasky		},
125290650Shselasky		.mr_cache[3]	= {
126290650Shselasky			.size	= 500,
127290650Shselasky			.limit	= 250
128290650Shselasky		},
129290650Shselasky		.mr_cache[4]	= {
130290650Shselasky			.size	= 500,
131290650Shselasky			.limit	= 250
132290650Shselasky		},
133290650Shselasky		.mr_cache[5]	= {
134290650Shselasky			.size	= 500,
135290650Shselasky			.limit	= 250
136290650Shselasky		},
137290650Shselasky		.mr_cache[6]	= {
138290650Shselasky			.size	= 500,
139290650Shselasky			.limit	= 250
140290650Shselasky		},
141290650Shselasky		.mr_cache[7]	= {
142290650Shselasky			.size	= 500,
143290650Shselasky			.limit	= 250
144290650Shselasky		},
145290650Shselasky		.mr_cache[8]	= {
146290650Shselasky			.size	= 500,
147290650Shselasky			.limit	= 250
148290650Shselasky		},
149290650Shselasky		.mr_cache[9]	= {
150290650Shselasky			.size	= 500,
151290650Shselasky			.limit	= 250
152290650Shselasky		},
153290650Shselasky		.mr_cache[10]	= {
154290650Shselasky			.size	= 500,
155290650Shselasky			.limit	= 250
156290650Shselasky		},
157290650Shselasky		.mr_cache[11]	= {
158290650Shselasky			.size	= 500,
159290650Shselasky			.limit	= 250
160290650Shselasky		},
161290650Shselasky		.mr_cache[12]	= {
162290650Shselasky			.size	= 64,
163290650Shselasky			.limit	= 32
164290650Shselasky		},
165290650Shselasky		.mr_cache[13]	= {
166290650Shselasky			.size	= 32,
167290650Shselasky			.limit	= 16
168290650Shselasky		},
169290650Shselasky		.mr_cache[14]	= {
170290650Shselasky			.size	= 16,
171290650Shselasky			.limit	= 8
172290650Shselasky		},
173290650Shselasky	},
174290650Shselasky	[3] = {
175290650Shselasky		.mask		= MLX5_PROF_MASK_QP_SIZE,
176290650Shselasky		.log_max_qp	= 17,
177290650Shselasky	},
178290650Shselasky};
179290650Shselasky
180359544Skib#ifdef PCI_IOV
181359544Skibstatic const char iov_mac_addr_name[] = "mac-addr";
182365414Skibstatic const char iov_node_guid_name[] = "node-guid";
183365414Skibstatic const char iov_port_guid_name[] = "port-guid";
184359544Skib#endif
185359544Skib
186290650Shselaskystatic int set_dma_caps(struct pci_dev *pdev)
187290650Shselasky{
188353224Shselasky	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
189290650Shselasky	int err;
190290650Shselasky
191290650Shselasky	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
192290650Shselasky	if (err) {
193353224Shselasky		mlx5_core_warn(dev, "couldn't set 64-bit PCI DMA mask\n");
194290650Shselasky		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
195290650Shselasky		if (err) {
196353224Shselasky			mlx5_core_err(dev, "Can't set PCI DMA mask, aborting\n");
197290650Shselasky			return err;
198290650Shselasky		}
199290650Shselasky	}
200290650Shselasky
201290650Shselasky	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
202290650Shselasky	if (err) {
203353224Shselasky		mlx5_core_warn(dev, "couldn't set 64-bit consistent PCI DMA mask\n");
204290650Shselasky		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
205290650Shselasky		if (err) {
206353224Shselasky			mlx5_core_err(dev, "Can't set consistent PCI DMA mask, aborting\n");
207290650Shselasky			return err;
208290650Shselasky		}
209290650Shselasky	}
210290650Shselasky
211290650Shselasky	dma_set_max_seg_size(&pdev->dev, 2u * 1024 * 1024 * 1024);
212290650Shselasky	return err;
213290650Shselasky}
214290650Shselasky
215347862Shselaskyint mlx5_pci_read_power_status(struct mlx5_core_dev *dev,
216347862Shselasky			       u16 *p_power, u8 *p_status)
217347862Shselasky{
218347862Shselasky	u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {};
219347862Shselasky	u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {};
220347862Shselasky	int err;
221347862Shselasky
222347862Shselasky	err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
223347862Shselasky	    MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MPEIN, 0, 0);
224347862Shselasky
225347862Shselasky	*p_status = MLX5_GET(mpein_reg, out, pwr_status);
226347862Shselasky	*p_power = MLX5_GET(mpein_reg, out, pci_power);
227347862Shselasky	return err;
228347862Shselasky}
229347862Shselasky
230331580Shselaskystatic int mlx5_pci_enable_device(struct mlx5_core_dev *dev)
231331580Shselasky{
232331580Shselasky	struct pci_dev *pdev = dev->pdev;
233331580Shselasky	int err = 0;
234331580Shselasky
235331580Shselasky	mutex_lock(&dev->pci_status_mutex);
236331580Shselasky	if (dev->pci_status == MLX5_PCI_STATUS_DISABLED) {
237331580Shselasky		err = pci_enable_device(pdev);
238331580Shselasky		if (!err)
239331580Shselasky			dev->pci_status = MLX5_PCI_STATUS_ENABLED;
240331580Shselasky	}
241331580Shselasky	mutex_unlock(&dev->pci_status_mutex);
242331580Shselasky
243331580Shselasky	return err;
244331580Shselasky}
245331580Shselasky
246331580Shselaskystatic void mlx5_pci_disable_device(struct mlx5_core_dev *dev)
247331580Shselasky{
248331580Shselasky	struct pci_dev *pdev = dev->pdev;
249331580Shselasky
250331580Shselasky	mutex_lock(&dev->pci_status_mutex);
251331580Shselasky	if (dev->pci_status == MLX5_PCI_STATUS_ENABLED) {
252331580Shselasky		pci_disable_device(pdev);
253331580Shselasky		dev->pci_status = MLX5_PCI_STATUS_DISABLED;
254331580Shselasky	}
255331580Shselasky	mutex_unlock(&dev->pci_status_mutex);
256331580Shselasky}
257331580Shselasky
258290650Shselaskystatic int request_bar(struct pci_dev *pdev)
259290650Shselasky{
260353224Shselasky	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
261290650Shselasky	int err = 0;
262290650Shselasky
263290650Shselasky	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
264353224Shselasky		mlx5_core_err(dev, "Missing registers BAR, aborting\n");
265290650Shselasky		return -ENODEV;
266290650Shselasky	}
267290650Shselasky
268290650Shselasky	err = pci_request_regions(pdev, DRIVER_NAME);
269290650Shselasky	if (err)
270353224Shselasky		mlx5_core_err(dev, "Couldn't get PCI resources, aborting\n");
271290650Shselasky
272290650Shselasky	return err;
273290650Shselasky}
274290650Shselasky
275290650Shselaskystatic void release_bar(struct pci_dev *pdev)
276290650Shselasky{
277290650Shselasky	pci_release_regions(pdev);
278290650Shselasky}
279290650Shselasky
280290650Shselaskystatic int mlx5_enable_msix(struct mlx5_core_dev *dev)
281290650Shselasky{
282290650Shselasky	struct mlx5_priv *priv = &dev->priv;
283290650Shselasky	struct mlx5_eq_table *table = &priv->eq_table;
284290650Shselasky	int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
285338554Shselasky	int limit = dev->msix_eqvec;
286337112Shselasky	int nvec = MLX5_EQ_VEC_COMP_BASE;
287290650Shselasky	int i;
288290650Shselasky
289337112Shselasky	if (limit > 0)
290337112Shselasky		nvec += limit;
291337112Shselasky	else
292337112Shselasky		nvec += MLX5_CAP_GEN(dev, num_ports) * num_online_cpus();
293337112Shselasky
294353189Shselasky	if (nvec > num_eqs)
295353189Shselasky		nvec = num_eqs;
296353189Shselasky	if (nvec > 256)
297353189Shselasky		nvec = 256;	/* limit of firmware API */
298290650Shselasky	if (nvec <= MLX5_EQ_VEC_COMP_BASE)
299290650Shselasky		return -ENOMEM;
300290650Shselasky
301290650Shselasky	priv->msix_arr = kzalloc(nvec * sizeof(*priv->msix_arr), GFP_KERNEL);
302290650Shselasky
303290650Shselasky	for (i = 0; i < nvec; i++)
304290650Shselasky		priv->msix_arr[i].entry = i;
305290650Shselasky
306290650Shselasky	nvec = pci_enable_msix_range(dev->pdev, priv->msix_arr,
307290650Shselasky				     MLX5_EQ_VEC_COMP_BASE + 1, nvec);
308290650Shselasky	if (nvec < 0)
309290650Shselasky		return nvec;
310290650Shselasky
311290650Shselasky	table->num_comp_vectors = nvec - MLX5_EQ_VEC_COMP_BASE;
312290650Shselasky	return 0;
313290650Shselasky}
314290650Shselasky
315290650Shselaskystatic void mlx5_disable_msix(struct mlx5_core_dev *dev)
316290650Shselasky{
317290650Shselasky	struct mlx5_priv *priv = &dev->priv;
318290650Shselasky
319290650Shselasky	pci_disable_msix(dev->pdev);
320290650Shselasky	kfree(priv->msix_arr);
321290650Shselasky}
322290650Shselasky
323290650Shselaskystruct mlx5_reg_host_endianess {
324290650Shselasky	u8	he;
325290650Shselasky	u8      rsvd[15];
326290650Shselasky};
327290650Shselasky
328290650Shselasky
329290650Shselasky#define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
330290650Shselasky
331290650Shselaskyenum {
332290650Shselasky	MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
333306233Shselasky				MLX5_DEV_CAP_FLAG_DCT |
334306233Shselasky				MLX5_DEV_CAP_FLAG_DRAIN_SIGERR,
335290650Shselasky};
336290650Shselasky
337353224Shselaskystatic u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size)
338290650Shselasky{
339290650Shselasky	switch (size) {
340290650Shselasky	case 128:
341290650Shselasky		return 0;
342290650Shselasky	case 256:
343290650Shselasky		return 1;
344290650Shselasky	case 512:
345290650Shselasky		return 2;
346290650Shselasky	case 1024:
347290650Shselasky		return 3;
348290650Shselasky	case 2048:
349290650Shselasky		return 4;
350290650Shselasky	case 4096:
351290650Shselasky		return 5;
352290650Shselasky	default:
353353224Shselasky		mlx5_core_warn(dev, "invalid pkey table size %d\n", size);
354290650Shselasky		return 0;
355290650Shselasky	}
356290650Shselasky}
357290650Shselasky
358331807Shselaskystatic int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
359331807Shselasky				   enum mlx5_cap_type cap_type,
360331807Shselasky				   enum mlx5_cap_mode cap_mode)
361290650Shselasky{
362290650Shselasky	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
363290650Shselasky	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
364290650Shselasky	void *out, *hca_caps;
365290650Shselasky	u16 opmod = (cap_type << 1) | (cap_mode & 0x01);
366290650Shselasky	int err;
367290650Shselasky
368290650Shselasky	memset(in, 0, sizeof(in));
369290650Shselasky	out = kzalloc(out_sz, GFP_KERNEL);
370290650Shselasky
371290650Shselasky	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
372290650Shselasky	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
373290650Shselasky	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
374290650Shselasky	if (err) {
375290650Shselasky		mlx5_core_warn(dev,
376290650Shselasky			       "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
377290650Shselasky			       cap_type, cap_mode, err);
378290650Shselasky		goto query_ex;
379290650Shselasky	}
380290650Shselasky
381290650Shselasky	hca_caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
382290650Shselasky
383290650Shselasky	switch (cap_mode) {
384290650Shselasky	case HCA_CAP_OPMOD_GET_MAX:
385290650Shselasky		memcpy(dev->hca_caps_max[cap_type], hca_caps,
386290650Shselasky		       MLX5_UN_SZ_BYTES(hca_cap_union));
387290650Shselasky		break;
388290650Shselasky	case HCA_CAP_OPMOD_GET_CUR:
389290650Shselasky		memcpy(dev->hca_caps_cur[cap_type], hca_caps,
390290650Shselasky		       MLX5_UN_SZ_BYTES(hca_cap_union));
391290650Shselasky		break;
392290650Shselasky	default:
393290650Shselasky		mlx5_core_warn(dev,
394290650Shselasky			       "Tried to query dev cap type(%x) with wrong opmode(%x)\n",
395290650Shselasky			       cap_type, cap_mode);
396290650Shselasky		err = -EINVAL;
397290650Shselasky		break;
398290650Shselasky	}
399290650Shselaskyquery_ex:
400290650Shselasky	kfree(out);
401290650Shselasky	return err;
402290650Shselasky}
403290650Shselasky
404331807Shselaskyint mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
405331807Shselasky{
406331807Shselasky	int ret;
407331807Shselasky
408331807Shselasky	ret = mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_CUR);
409331807Shselasky	if (ret)
410331807Shselasky		return ret;
411331807Shselasky
412331807Shselasky	return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
413331807Shselasky}
414331807Shselasky
415290650Shselaskystatic int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
416290650Shselasky{
417331807Shselasky	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
418290650Shselasky
419290650Shselasky	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
420290650Shselasky
421331807Shselasky	return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
422290650Shselasky}
423290650Shselasky
424290650Shselaskystatic int handle_hca_cap(struct mlx5_core_dev *dev)
425290650Shselasky{
426290650Shselasky	void *set_ctx = NULL;
427290650Shselasky	struct mlx5_profile *prof = dev->profile;
428290650Shselasky	int err = -ENOMEM;
429290650Shselasky	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
430290650Shselasky	void *set_hca_cap;
431290650Shselasky
432290650Shselasky	set_ctx = kzalloc(set_sz, GFP_KERNEL);
433290650Shselasky
434331807Shselasky	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
435290650Shselasky	if (err)
436290650Shselasky		goto query_ex;
437290650Shselasky
438290650Shselasky	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
439290650Shselasky				   capability);
440290650Shselasky	memcpy(set_hca_cap, dev->hca_caps_cur[MLX5_CAP_GENERAL],
441290650Shselasky	       MLX5_ST_SZ_BYTES(cmd_hca_cap));
442290650Shselasky
443290650Shselasky	mlx5_core_dbg(dev, "Current Pkey table size %d Setting new size %d\n",
444290650Shselasky		      mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(dev, pkey_table_size)),
445290650Shselasky		      128);
446290650Shselasky	/* we limit the size of the pkey table to 128 entries for now */
447290650Shselasky	MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size,
448353224Shselasky		 to_fw_pkey_sz(dev, 128));
449290650Shselasky
450290650Shselasky	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
451290650Shselasky		MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp,
452290650Shselasky			 prof->log_max_qp);
453290650Shselasky
454290650Shselasky	/* disable cmdif checksum */
455290650Shselasky	MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0);
456290650Shselasky
457306233Shselasky	/* enable drain sigerr */
458306233Shselasky	MLX5_SET(cmd_hca_cap, set_hca_cap, drain_sigerr, 1);
459306233Shselasky
460290650Shselasky	MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12);
461290650Shselasky
462290650Shselasky	err = set_caps(dev, set_ctx, set_sz);
463290650Shselasky
464290650Shselaskyquery_ex:
465290650Shselasky	kfree(set_ctx);
466290650Shselasky	return err;
467290650Shselasky}
468290650Shselasky
469329209Shselaskystatic int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
470329209Shselasky{
471329209Shselasky	void *set_ctx;
472329209Shselasky	void *set_hca_cap;
473329209Shselasky	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
474329209Shselasky	int req_endianness;
475329209Shselasky	int err;
476329209Shselasky
477329209Shselasky	if (MLX5_CAP_GEN(dev, atomic)) {
478331807Shselasky		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
479329209Shselasky		if (err)
480329209Shselasky			return err;
481329209Shselasky	} else {
482329209Shselasky		return 0;
483329209Shselasky	}
484329209Shselasky
485329209Shselasky	req_endianness =
486329209Shselasky		MLX5_CAP_ATOMIC(dev,
487329209Shselasky				supported_atomic_req_8B_endianess_mode_1);
488329209Shselasky
489329209Shselasky	if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
490329209Shselasky		return 0;
491329209Shselasky
492329209Shselasky	set_ctx = kzalloc(set_sz, GFP_KERNEL);
493329209Shselasky	if (!set_ctx)
494329209Shselasky		return -ENOMEM;
495329209Shselasky
496329209Shselasky	MLX5_SET(set_hca_cap_in, set_ctx, op_mod,
497329209Shselasky		 MLX5_SET_HCA_CAP_OP_MOD_ATOMIC << 1);
498329209Shselasky	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
499329209Shselasky
500329209Shselasky	/* Set requestor to host endianness */
501329209Shselasky	MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianess_mode,
502329209Shselasky		 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
503329209Shselasky
504329209Shselasky	err = set_caps(dev, set_ctx, set_sz);
505329209Shselasky
506329209Shselasky	kfree(set_ctx);
507329209Shselasky	return err;
508329209Shselasky}
509329209Shselasky
510290650Shselaskystatic int set_hca_ctrl(struct mlx5_core_dev *dev)
511290650Shselasky{
512290650Shselasky	struct mlx5_reg_host_endianess he_in;
513290650Shselasky	struct mlx5_reg_host_endianess he_out;
514290650Shselasky	int err;
515290650Shselasky
516306233Shselasky	if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
517306233Shselasky	    !MLX5_CAP_GEN(dev, roce))
518306233Shselasky		return 0;
519306233Shselasky
520290650Shselasky	memset(&he_in, 0, sizeof(he_in));
521290650Shselasky	he_in.he = MLX5_SET_HOST_ENDIANNESS;
522290650Shselasky	err = mlx5_core_access_reg(dev, &he_in,  sizeof(he_in),
523290650Shselasky					&he_out, sizeof(he_out),
524290650Shselasky					MLX5_REG_HOST_ENDIANNESS, 0, 1);
525290650Shselasky	return err;
526290650Shselasky}
527290650Shselasky
528359540Skibstatic int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
529290650Shselasky{
530331807Shselasky	u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
531331807Shselasky	u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0};
532290650Shselasky
533290650Shselasky	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
534359540Skib	MLX5_SET(enable_hca_in, in, function_id, func_id);
535331807Shselasky	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
536290650Shselasky}
537290650Shselasky
538290650Shselaskystatic int mlx5_core_disable_hca(struct mlx5_core_dev *dev)
539290650Shselasky{
540331807Shselasky	u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
541331807Shselasky	u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0};
542290650Shselasky
543290650Shselasky	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
544331807Shselasky	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
545290650Shselasky}
546290650Shselasky
547290650Shselaskystatic int mlx5_core_set_issi(struct mlx5_core_dev *dev)
548290650Shselasky{
549331807Shselasky	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0};
550331807Shselasky	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
551331807Shselasky	u32 sup_issi;
552290650Shselasky	int err;
553290650Shselasky
554290650Shselasky	MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
555290650Shselasky
556331807Shselasky	err = mlx5_cmd_exec(dev, query_in, sizeof(query_in), query_out, sizeof(query_out));
557290650Shselasky	if (err) {
558331807Shselasky		u32 syndrome;
559331807Shselasky		u8 status;
560331807Shselasky
561331807Shselasky		mlx5_cmd_mbox_status(query_out, &status, &syndrome);
562331807Shselasky		if (status == MLX5_CMD_STAT_BAD_OP_ERR) {
563353224Shselasky			mlx5_core_dbg(dev, "Only ISSI 0 is supported\n");
564290650Shselasky			return 0;
565290650Shselasky		}
566290650Shselasky
567353224Shselasky		mlx5_core_err(dev, "failed to query ISSI\n");
568290650Shselasky		return err;
569290650Shselasky	}
570290650Shselasky
571290650Shselasky	sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
572290650Shselasky
573290650Shselasky	if (sup_issi & (1 << 1)) {
574331807Shselasky		u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]	 = {0};
575331807Shselasky		u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
576290650Shselasky
577290650Shselasky		MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
578290650Shselasky		MLX5_SET(set_issi_in, set_in, current_issi, 1);
579290650Shselasky
580331807Shselasky		err = mlx5_cmd_exec(dev, set_in, sizeof(set_in), set_out, sizeof(set_out));
581290650Shselasky		if (err) {
582353224Shselasky			mlx5_core_err(dev, "failed to set ISSI=1 err(%d)\n", err);
583290650Shselasky			return err;
584290650Shselasky		}
585290650Shselasky
586290650Shselasky		dev->issi = 1;
587290650Shselasky
588290650Shselasky		return 0;
589290650Shselasky	} else if (sup_issi & (1 << 0)) {
590290650Shselasky		return 0;
591290650Shselasky	}
592290650Shselasky
593290650Shselasky	return -ENOTSUPP;
594290650Shselasky}
595290650Shselasky
596290650Shselasky
597290650Shselaskyint mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, int *irqn)
598290650Shselasky{
599290650Shselasky	struct mlx5_eq_table *table = &dev->priv.eq_table;
600290650Shselasky	struct mlx5_eq *eq;
601290650Shselasky	int err = -ENOENT;
602290650Shselasky
603290650Shselasky	spin_lock(&table->lock);
604290650Shselasky	list_for_each_entry(eq, &table->comp_eqs_list, list) {
605290650Shselasky		if (eq->index == vector) {
606290650Shselasky			*eqn = eq->eqn;
607290650Shselasky			*irqn = eq->irqn;
608290650Shselasky			err = 0;
609290650Shselasky			break;
610290650Shselasky		}
611290650Shselasky	}
612290650Shselasky	spin_unlock(&table->lock);
613290650Shselasky
614290650Shselasky	return err;
615290650Shselasky}
616290650ShselaskyEXPORT_SYMBOL(mlx5_vector2eqn);
617290650Shselasky
618290650Shselaskystatic void free_comp_eqs(struct mlx5_core_dev *dev)
619290650Shselasky{
620290650Shselasky	struct mlx5_eq_table *table = &dev->priv.eq_table;
621290650Shselasky	struct mlx5_eq *eq, *n;
622290650Shselasky
623290650Shselasky	spin_lock(&table->lock);
624290650Shselasky	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
625290650Shselasky		list_del(&eq->list);
626290650Shselasky		spin_unlock(&table->lock);
627290650Shselasky		if (mlx5_destroy_unmap_eq(dev, eq))
628290650Shselasky			mlx5_core_warn(dev, "failed to destroy EQ 0x%x\n",
629290650Shselasky				       eq->eqn);
630290650Shselasky		kfree(eq);
631290650Shselasky		spin_lock(&table->lock);
632290650Shselasky	}
633290650Shselasky	spin_unlock(&table->lock);
634290650Shselasky}
635290650Shselasky
636290650Shselaskystatic int alloc_comp_eqs(struct mlx5_core_dev *dev)
637290650Shselasky{
638290650Shselasky	struct mlx5_eq_table *table = &dev->priv.eq_table;
639290650Shselasky	struct mlx5_eq *eq;
640290650Shselasky	int ncomp_vec;
641290650Shselasky	int nent;
642290650Shselasky	int err;
643290650Shselasky	int i;
644290650Shselasky
645290650Shselasky	INIT_LIST_HEAD(&table->comp_eqs_list);
646290650Shselasky	ncomp_vec = table->num_comp_vectors;
647290650Shselasky	nent = MLX5_COMP_EQ_SIZE;
648290650Shselasky	for (i = 0; i < ncomp_vec; i++) {
649290650Shselasky		eq = kzalloc(sizeof(*eq), GFP_KERNEL);
650290650Shselasky
651290650Shselasky		err = mlx5_create_map_eq(dev, eq,
652290650Shselasky					 i + MLX5_EQ_VEC_COMP_BASE, nent, 0,
653353199Shselasky					 &dev->priv.uuari.uars[0]);
654290650Shselasky		if (err) {
655290650Shselasky			kfree(eq);
656290650Shselasky			goto clean;
657290650Shselasky		}
658290650Shselasky		mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->eqn);
659290650Shselasky		eq->index = i;
660290650Shselasky		spin_lock(&table->lock);
661290650Shselasky		list_add_tail(&eq->list, &table->comp_eqs_list);
662290650Shselasky		spin_unlock(&table->lock);
663290650Shselasky	}
664290650Shselasky
665290650Shselasky	return 0;
666290650Shselasky
667290650Shselaskyclean:
668290650Shselasky	free_comp_eqs(dev);
669290650Shselasky	return err;
670290650Shselasky}
671290650Shselasky
672290650Shselaskystatic int map_bf_area(struct mlx5_core_dev *dev)
673290650Shselasky{
674290650Shselasky	resource_size_t bf_start = pci_resource_start(dev->pdev, 0);
675290650Shselasky	resource_size_t bf_len = pci_resource_len(dev->pdev, 0);
676290650Shselasky
677290650Shselasky	dev->priv.bf_mapping = io_mapping_create_wc(bf_start, bf_len);
678290650Shselasky
679290650Shselasky	return dev->priv.bf_mapping ? 0 : -ENOMEM;
680290650Shselasky}
681290650Shselasky
682290650Shselaskystatic void unmap_bf_area(struct mlx5_core_dev *dev)
683290650Shselasky{
684290650Shselasky	if (dev->priv.bf_mapping)
685290650Shselasky		io_mapping_free(dev->priv.bf_mapping);
686290650Shselasky}
687290650Shselasky
688290650Shselaskystatic inline int fw_initializing(struct mlx5_core_dev *dev)
689290650Shselasky{
690290650Shselasky	return ioread32be(&dev->iseg->initializing) >> 31;
691290650Shselasky}
692290650Shselasky
693353254Shselaskystatic int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
694353254Shselasky			u32 warn_time_mili)
695290650Shselasky{
696353254Shselasky	int warn = jiffies + msecs_to_jiffies(warn_time_mili);
697353254Shselasky	int end = jiffies + msecs_to_jiffies(max_wait_mili);
698290650Shselasky	int err = 0;
699290650Shselasky
700353254Shselasky	MPASS(max_wait_mili > warn_time_mili);
701353254Shselasky
702353254Shselasky	while (fw_initializing(dev) == 1) {
703290650Shselasky		if (time_after(jiffies, end)) {
704290650Shselasky			err = -EBUSY;
705290650Shselasky			break;
706290650Shselasky		}
707353254Shselasky		if (warn_time_mili && time_after(jiffies, warn)) {
708353254Shselasky			mlx5_core_warn(dev,
709353254Shselasky			    "Waiting for FW initialization, timeout abort in %u s\n",
710369092Shselasky			    (unsigned)(jiffies_to_msecs(end - warn) / 1000));
711353254Shselasky			warn = jiffies + msecs_to_jiffies(warn_time_mili);
712353254Shselasky		}
713290650Shselasky		msleep(FW_INIT_WAIT_MS);
714290650Shselasky	}
715290650Shselasky
716353254Shselasky	if (err != 0)
717353254Shselasky		mlx5_core_dbg(dev, "Full initializing bit dword = 0x%x\n",
718353254Shselasky		    ioread32be(&dev->iseg->initializing));
719353254Shselasky
720290650Shselasky	return err;
721290650Shselasky}
722290650Shselasky
723331580Shselaskystatic void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
724290650Shselasky{
725331580Shselasky	struct mlx5_device_context *dev_ctx;
726331580Shselasky	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
727331580Shselasky
728331580Shselasky	dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL);
729331580Shselasky	if (!dev_ctx)
730331580Shselasky		return;
731331580Shselasky
732331580Shselasky	dev_ctx->intf    = intf;
733331580Shselasky	CURVNET_SET_QUIET(vnet0);
734331580Shselasky	dev_ctx->context = intf->add(dev);
735331580Shselasky	CURVNET_RESTORE();
736331580Shselasky
737331580Shselasky	if (dev_ctx->context) {
738331580Shselasky		spin_lock_irq(&priv->ctx_lock);
739331580Shselasky		list_add_tail(&dev_ctx->list, &priv->ctx_list);
740331580Shselasky		spin_unlock_irq(&priv->ctx_lock);
741331580Shselasky	} else {
742331580Shselasky		kfree(dev_ctx);
743331580Shselasky	}
744331580Shselasky}
745331580Shselasky
746331580Shselaskystatic void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
747331580Shselasky{
748331580Shselasky	struct mlx5_device_context *dev_ctx;
749331580Shselasky	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
750331580Shselasky
751331580Shselasky	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
752331580Shselasky		if (dev_ctx->intf == intf) {
753331580Shselasky			spin_lock_irq(&priv->ctx_lock);
754331580Shselasky			list_del(&dev_ctx->list);
755331580Shselasky			spin_unlock_irq(&priv->ctx_lock);
756331580Shselasky
757331580Shselasky			intf->remove(dev, dev_ctx->context);
758331580Shselasky			kfree(dev_ctx);
759331580Shselasky			return;
760331580Shselasky		}
761331580Shselasky}
762331580Shselasky
763341958Shselaskyint
764341958Shselaskymlx5_register_device(struct mlx5_core_dev *dev)
765331580Shselasky{
766290650Shselasky	struct mlx5_priv *priv = &dev->priv;
767331580Shselasky	struct mlx5_interface *intf;
768290650Shselasky
769331580Shselasky	mutex_lock(&intf_mutex);
770331580Shselasky	list_add_tail(&priv->dev_list, &dev_list);
771331580Shselasky	list_for_each_entry(intf, &intf_list, list)
772331580Shselasky		mlx5_add_device(intf, priv);
773331580Shselasky	mutex_unlock(&intf_mutex);
774331580Shselasky
775331580Shselasky	return 0;
776331580Shselasky}
777331580Shselasky
778341958Shselaskyvoid
779341958Shselaskymlx5_unregister_device(struct mlx5_core_dev *dev)
780331580Shselasky{
781331580Shselasky	struct mlx5_priv *priv = &dev->priv;
782331580Shselasky	struct mlx5_interface *intf;
783331580Shselasky
784331580Shselasky	mutex_lock(&intf_mutex);
785331580Shselasky	list_for_each_entry(intf, &intf_list, list)
786331580Shselasky		mlx5_remove_device(intf, priv);
787331580Shselasky	list_del(&priv->dev_list);
788331580Shselasky	mutex_unlock(&intf_mutex);
789331580Shselasky}
790331580Shselasky
791331580Shselaskyint mlx5_register_interface(struct mlx5_interface *intf)
792331580Shselasky{
793331580Shselasky	struct mlx5_priv *priv;
794331580Shselasky
795331580Shselasky	if (!intf->add || !intf->remove)
796331580Shselasky		return -EINVAL;
797331580Shselasky
798331580Shselasky	mutex_lock(&intf_mutex);
799331580Shselasky	list_add_tail(&intf->list, &intf_list);
800331580Shselasky	list_for_each_entry(priv, &dev_list, dev_list)
801331580Shselasky		mlx5_add_device(intf, priv);
802331580Shselasky	mutex_unlock(&intf_mutex);
803331580Shselasky
804331580Shselasky	return 0;
805331580Shselasky}
806331580ShselaskyEXPORT_SYMBOL(mlx5_register_interface);
807331580Shselasky
808331580Shselaskyvoid mlx5_unregister_interface(struct mlx5_interface *intf)
809331580Shselasky{
810331580Shselasky	struct mlx5_priv *priv;
811331580Shselasky
812331580Shselasky	mutex_lock(&intf_mutex);
813331580Shselasky	list_for_each_entry(priv, &dev_list, dev_list)
814331580Shselasky		mlx5_remove_device(intf, priv);
815331580Shselasky	list_del(&intf->list);
816331580Shselasky	mutex_unlock(&intf_mutex);
817331580Shselasky}
818331580ShselaskyEXPORT_SYMBOL(mlx5_unregister_interface);
819331580Shselasky
820331580Shselaskyvoid *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol)
821331580Shselasky{
822331580Shselasky	struct mlx5_priv *priv = &mdev->priv;
823331580Shselasky	struct mlx5_device_context *dev_ctx;
824331580Shselasky	unsigned long flags;
825331580Shselasky	void *result = NULL;
826331580Shselasky
827331580Shselasky	spin_lock_irqsave(&priv->ctx_lock, flags);
828331580Shselasky
829331580Shselasky	list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list)
830331580Shselasky		if ((dev_ctx->intf->protocol == protocol) &&
831331580Shselasky		    dev_ctx->intf->get_dev) {
832331580Shselasky			result = dev_ctx->intf->get_dev(dev_ctx->context);
833331580Shselasky			break;
834331580Shselasky		}
835331580Shselasky
836331580Shselasky	spin_unlock_irqrestore(&priv->ctx_lock, flags);
837331580Shselasky
838331580Shselasky	return result;
839331580Shselasky}
840331580ShselaskyEXPORT_SYMBOL(mlx5_get_protocol_dev);
841331580Shselasky
842347853Shselaskystatic int mlx5_auto_fw_update;
843347853ShselaskySYSCTL_INT(_hw_mlx5, OID_AUTO, auto_fw_update, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
844347853Shselasky    &mlx5_auto_fw_update, 0,
845347853Shselasky    "Allow automatic firmware update on driver start");
846347847Shselaskystatic int
847347847Shselaskymlx5_firmware_update(struct mlx5_core_dev *dev)
848347847Shselasky{
849347847Shselasky	const struct firmware *fw;
850347847Shselasky	int err;
851347847Shselasky
852347853Shselasky	TUNABLE_INT_FETCH("hw.mlx5.auto_fw_update", &mlx5_auto_fw_update);
853347853Shselasky	if (!mlx5_auto_fw_update)
854347853Shselasky		return (0);
855347847Shselasky	fw = firmware_get("mlx5fw_mfa");
856347847Shselasky	if (fw) {
857347847Shselasky		err = mlx5_firmware_flash(dev, fw);
858347847Shselasky		firmware_put(fw, FIRMWARE_UNLOAD);
859347847Shselasky	}
860347847Shselasky	else
861347847Shselasky		return (-ENOENT);
862347847Shselasky
863347847Shselasky	return err;
864347847Shselasky}
865347847Shselasky
866331580Shselaskystatic int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
867331580Shselasky{
868331580Shselasky	struct pci_dev *pdev = dev->pdev;
869359540Skib	device_t bsddev;
870359540Skib	int err;
871331580Shselasky
872359540Skib	pdev = dev->pdev;
873359540Skib	bsddev = pdev->dev.bsddev;
874290650Shselasky	pci_set_drvdata(dev->pdev, dev);
875290650Shselasky	strncpy(priv->name, dev_name(&pdev->dev), MLX5_MAX_NAME_LEN);
876290650Shselasky	priv->name[MLX5_MAX_NAME_LEN - 1] = 0;
877290650Shselasky
878290650Shselasky	mutex_init(&priv->pgdir_mutex);
879290650Shselasky	INIT_LIST_HEAD(&priv->pgdir_list);
880290650Shselasky	spin_lock_init(&priv->mkey_lock);
881290650Shselasky
882290650Shselasky	priv->numa_node = NUMA_NO_NODE;
883290650Shselasky
884331580Shselasky	err = mlx5_pci_enable_device(dev);
885290650Shselasky	if (err) {
886353224Shselasky		mlx5_core_err(dev, "Cannot enable PCI device, aborting\n");
887290650Shselasky		goto err_dbg;
888290650Shselasky	}
889290650Shselasky
890290650Shselasky	err = request_bar(pdev);
891290650Shselasky	if (err) {
892353224Shselasky		mlx5_core_err(dev, "error requesting BARs, aborting\n");
893290650Shselasky		goto err_disable;
894290650Shselasky	}
895290650Shselasky
896290650Shselasky	pci_set_master(pdev);
897290650Shselasky
898290650Shselasky	err = set_dma_caps(pdev);
899290650Shselasky	if (err) {
900353224Shselasky		mlx5_core_err(dev, "Failed setting DMA capabilities mask, aborting\n");
901290650Shselasky		goto err_clr_master;
902290650Shselasky	}
903290650Shselasky
904329212Shselasky	dev->iseg_base = pci_resource_start(dev->pdev, 0);
905329212Shselasky	dev->iseg = ioremap(dev->iseg_base, sizeof(*dev->iseg));
906290650Shselasky	if (!dev->iseg) {
907290650Shselasky		err = -ENOMEM;
908353224Shselasky		mlx5_core_err(dev, "Failed mapping initialization segment, aborting\n");
909290650Shselasky		goto err_clr_master;
910290650Shselasky	}
911331580Shselasky
912337105Shselasky	return 0;
913331585Shselasky
914331580Shselaskyerr_clr_master:
915331580Shselasky	release_bar(dev->pdev);
916331580Shselaskyerr_disable:
917331580Shselasky	mlx5_pci_disable_device(dev);
918331580Shselaskyerr_dbg:
919331580Shselasky	return err;
920331580Shselasky}
921331580Shselasky
922331580Shselaskystatic void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
923331580Shselasky{
924359540Skib#ifdef PCI_IOV
925359540Skib	if (MLX5_CAP_GEN(dev, eswitch_flow_table))
926359540Skib		pci_iov_detach(dev->pdev->dev.bsddev);
927359540Skib#endif
928331580Shselasky	iounmap(dev->iseg);
929331580Shselasky	release_bar(dev->pdev);
930331580Shselasky	mlx5_pci_disable_device(dev);
931331580Shselasky}
932331580Shselasky
933331810Shselaskystatic int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv)
934331580Shselasky{
935331580Shselasky	int err;
936331580Shselasky
937337103Shselasky	err = mlx5_vsc_find_cap(dev);
938337103Shselasky	if (err)
939369091Shselasky		mlx5_core_warn(dev, "Unable to find vendor specific capabilities\n");
940331815Shselasky
941331810Shselasky	err = mlx5_query_hca_caps(dev);
942331810Shselasky	if (err) {
943353224Shselasky		mlx5_core_err(dev, "query hca failed\n");
944331810Shselasky		goto out;
945331810Shselasky	}
946331810Shselasky
947331810Shselasky	err = mlx5_query_board_id(dev);
948331810Shselasky	if (err) {
949353224Shselasky		mlx5_core_err(dev, "query board id failed\n");
950331810Shselasky		goto out;
951331810Shselasky	}
952331810Shselasky
953331810Shselasky	err = mlx5_eq_init(dev);
954331810Shselasky	if (err) {
955353224Shselasky		mlx5_core_err(dev, "failed to initialize eq\n");
956331810Shselasky		goto out;
957331810Shselasky	}
958331810Shselasky
959331810Shselasky	MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock);
960331810Shselasky
961331810Shselasky	err = mlx5_init_cq_table(dev);
962331810Shselasky	if (err) {
963353224Shselasky		mlx5_core_err(dev, "failed to initialize cq table\n");
964331810Shselasky		goto err_eq_cleanup;
965331810Shselasky	}
966331810Shselasky
967331810Shselasky	mlx5_init_qp_table(dev);
968331810Shselasky	mlx5_init_srq_table(dev);
969331810Shselasky	mlx5_init_mr_table(dev);
970331810Shselasky
971341958Shselasky	mlx5_init_reserved_gids(dev);
972341958Shselasky	mlx5_fpga_init(dev);
973341958Shselasky
974331810Shselasky	return 0;
975331810Shselasky
976331810Shselaskyerr_eq_cleanup:
977331810Shselasky	mlx5_eq_cleanup(dev);
978331810Shselasky
979331810Shselaskyout:
980331810Shselasky	return err;
981331810Shselasky}
982331810Shselasky
983331810Shselaskystatic void mlx5_cleanup_once(struct mlx5_core_dev *dev)
984331810Shselasky{
985341958Shselasky	mlx5_fpga_cleanup(dev);
986341958Shselasky	mlx5_cleanup_reserved_gids(dev);
987331810Shselasky	mlx5_cleanup_mr_table(dev);
988331810Shselasky	mlx5_cleanup_srq_table(dev);
989331810Shselasky	mlx5_cleanup_qp_table(dev);
990331810Shselasky	mlx5_cleanup_cq_table(dev);
991331810Shselasky	mlx5_eq_cleanup(dev);
992331810Shselasky}
993331810Shselasky
994331810Shselaskystatic int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
995331810Shselasky			 bool boot)
996331810Shselasky{
997331810Shselasky	int err;
998331810Shselasky
999331580Shselasky	mutex_lock(&dev->intf_state_mutex);
1000331580Shselasky	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1001353224Shselasky		mlx5_core_warn(dev, "interface is up, NOP\n");
1002331580Shselasky		goto out;
1003331580Shselasky	}
1004331580Shselasky
1005353224Shselasky	mlx5_core_dbg(dev, "firmware version: %d.%d.%d\n",
1006353224Shselasky	    fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
1007290650Shselasky
1008306233Shselasky	/*
1009306233Shselasky	 * On load removing any previous indication of internal error,
1010306233Shselasky	 * device is up
1011306233Shselasky	 */
1012306233Shselasky	dev->state = MLX5_DEVICE_STATE_UP;
1013306233Shselasky
1014353254Shselasky	/* wait for firmware to accept initialization segments configurations
1015353254Shselasky	*/
1016353254Shselasky	err = wait_fw_init(dev, FW_PRE_INIT_TIMEOUT_MILI,
1017353254Shselasky	    FW_INIT_WARN_MESSAGE_INTERVAL);
1018353254Shselasky	if (err) {
1019353254Shselasky		dev_err(&dev->pdev->dev,
1020353254Shselasky		    "Firmware over %d MS in pre-initializing state, aborting\n",
1021353254Shselasky		    FW_PRE_INIT_TIMEOUT_MILI);
1022353254Shselasky		goto out_err;
1023353254Shselasky	}
1024353254Shselasky
1025290650Shselasky	err = mlx5_cmd_init(dev);
1026290650Shselasky	if (err) {
1027353254Shselasky		mlx5_core_err(dev,
1028353254Shselasky		    "Failed initializing command interface, aborting\n");
1029331580Shselasky		goto out_err;
1030290650Shselasky	}
1031290650Shselasky
1032353254Shselasky	err = wait_fw_init(dev, FW_INIT_TIMEOUT_MILI, 0);
1033290650Shselasky	if (err) {
1034353254Shselasky		mlx5_core_err(dev,
1035353254Shselasky		    "Firmware over %d MS in initializing state, aborting\n",
1036353254Shselasky		    FW_INIT_TIMEOUT_MILI);
1037290650Shselasky		goto err_cmd_cleanup;
1038290650Shselasky	}
1039290650Shselasky
1040359540Skib	err = mlx5_core_enable_hca(dev, 0);
1041290650Shselasky	if (err) {
1042353224Shselasky		mlx5_core_err(dev, "enable hca failed\n");
1043331810Shselasky		goto err_cmd_cleanup;
1044290650Shselasky	}
1045290650Shselasky
1046290650Shselasky	err = mlx5_core_set_issi(dev);
1047290650Shselasky	if (err) {
1048353224Shselasky		mlx5_core_err(dev, "failed to set issi\n");
1049290650Shselasky		goto err_disable_hca;
1050290650Shselasky	}
1051290650Shselasky
1052290650Shselasky	err = mlx5_pagealloc_start(dev);
1053290650Shselasky	if (err) {
1054353224Shselasky		mlx5_core_err(dev, "mlx5_pagealloc_start failed\n");
1055290650Shselasky		goto err_disable_hca;
1056290650Shselasky	}
1057290650Shselasky
1058290650Shselasky	err = mlx5_satisfy_startup_pages(dev, 1);
1059290650Shselasky	if (err) {
1060353224Shselasky		mlx5_core_err(dev, "failed to allocate boot pages\n");
1061290650Shselasky		goto err_pagealloc_stop;
1062290650Shselasky	}
1063290650Shselasky
1064329209Shselasky	err = set_hca_ctrl(dev);
1065329209Shselasky	if (err) {
1066353224Shselasky		mlx5_core_err(dev, "set_hca_ctrl failed\n");
1067329209Shselasky		goto reclaim_boot_pages;
1068329209Shselasky	}
1069329209Shselasky
1070306233Shselasky	err = handle_hca_cap(dev);
1071290650Shselasky	if (err) {
1072353224Shselasky		mlx5_core_err(dev, "handle_hca_cap failed\n");
1073290650Shselasky		goto reclaim_boot_pages;
1074290650Shselasky	}
1075290650Shselasky
1076329209Shselasky	err = handle_hca_cap_atomic(dev);
1077290650Shselasky	if (err) {
1078353224Shselasky		mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
1079290650Shselasky		goto reclaim_boot_pages;
1080290650Shselasky	}
1081290650Shselasky
1082290650Shselasky	err = mlx5_satisfy_startup_pages(dev, 0);
1083290650Shselasky	if (err) {
1084353224Shselasky		mlx5_core_err(dev, "failed to allocate init pages\n");
1085290650Shselasky		goto reclaim_boot_pages;
1086290650Shselasky	}
1087290650Shselasky
1088290650Shselasky	err = mlx5_cmd_init_hca(dev);
1089290650Shselasky	if (err) {
1090353224Shselasky		mlx5_core_err(dev, "init hca failed\n");
1091290650Shselasky		goto reclaim_boot_pages;
1092290650Shselasky	}
1093290650Shselasky
1094290650Shselasky	mlx5_start_health_poll(dev);
1095290650Shselasky
1096331810Shselasky	if (boot && mlx5_init_once(dev, priv)) {
1097353224Shselasky		mlx5_core_err(dev, "sw objs init failed\n");
1098290650Shselasky		goto err_stop_poll;
1099290650Shselasky	}
1100290650Shselasky
1101290650Shselasky	err = mlx5_enable_msix(dev);
1102290650Shselasky	if (err) {
1103353224Shselasky		mlx5_core_err(dev, "enable msix failed\n");
1104331810Shselasky		goto err_cleanup_once;
1105290650Shselasky	}
1106290650Shselasky
1107290650Shselasky	err = mlx5_alloc_uuars(dev, &priv->uuari);
1108290650Shselasky	if (err) {
1109353224Shselasky		mlx5_core_err(dev, "Failed allocating uar, aborting\n");
1110331810Shselasky		goto err_disable_msix;
1111290650Shselasky	}
1112290650Shselasky
1113290650Shselasky	err = mlx5_start_eqs(dev);
1114290650Shselasky	if (err) {
1115353224Shselasky		mlx5_core_err(dev, "Failed to start pages and async EQs\n");
1116290650Shselasky		goto err_free_uar;
1117290650Shselasky	}
1118290650Shselasky
1119290650Shselasky	err = alloc_comp_eqs(dev);
1120290650Shselasky	if (err) {
1121353224Shselasky		mlx5_core_err(dev, "Failed to alloc completion EQs\n");
1122290650Shselasky		goto err_stop_eqs;
1123290650Shselasky	}
1124290650Shselasky
1125290650Shselasky	if (map_bf_area(dev))
1126353224Shselasky		mlx5_core_err(dev, "Failed to map blue flame area\n");
1127290650Shselasky
1128329200Shselasky	err = mlx5_init_fs(dev);
1129329200Shselasky	if (err) {
1130329200Shselasky		mlx5_core_err(dev, "flow steering init %d\n", err);
1131331810Shselasky		goto err_free_comp_eqs;
1132329200Shselasky	}
1133329200Shselasky
1134353197Shselasky	err = mlx5_mpfs_init(dev);
1135353197Shselasky	if (err) {
1136353197Shselasky		mlx5_core_err(dev, "mpfs init failed %d\n", err);
1137353197Shselasky		goto err_fs;
1138353197Shselasky	}
1139353197Shselasky
1140341958Shselasky	err = mlx5_fpga_device_start(dev);
1141341958Shselasky	if (err) {
1142353224Shselasky		mlx5_core_err(dev, "fpga device start failed %d\n", err);
1143353197Shselasky		goto err_mpfs;
1144341958Shselasky	}
1145341958Shselasky
1146331580Shselasky	err = mlx5_register_device(dev);
1147331580Shselasky	if (err) {
1148353224Shselasky		mlx5_core_err(dev, "mlx5_register_device failed %d\n", err);
1149353193Shselasky		goto err_fpga;
1150331580Shselasky	}
1151331580Shselasky
1152331580Shselasky	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1153331580Shselasky
1154331580Shselaskyout:
1155331580Shselasky	mutex_unlock(&dev->intf_state_mutex);
1156290650Shselasky	return 0;
1157290650Shselasky
1158353193Shselaskyerr_fpga:
1159353193Shselasky	mlx5_fpga_device_stop(dev);
1160353193Shselasky
1161353197Shselaskyerr_mpfs:
1162353197Shselasky	mlx5_mpfs_destroy(dev);
1163353197Shselasky
1164331810Shselaskyerr_fs:
1165331580Shselasky	mlx5_cleanup_fs(dev);
1166331810Shselasky
1167331810Shselaskyerr_free_comp_eqs:
1168331810Shselasky	free_comp_eqs(dev);
1169329200Shselasky	unmap_bf_area(dev);
1170329200Shselasky
1171290650Shselaskyerr_stop_eqs:
1172290650Shselasky	mlx5_stop_eqs(dev);
1173290650Shselasky
1174290650Shselaskyerr_free_uar:
1175290650Shselasky	mlx5_free_uuars(dev, &priv->uuari);
1176290650Shselasky
1177331810Shselaskyerr_disable_msix:
1178290650Shselasky	mlx5_disable_msix(dev);
1179290650Shselasky
1180331810Shselaskyerr_cleanup_once:
1181331810Shselasky	if (boot)
1182331810Shselasky		mlx5_cleanup_once(dev);
1183331810Shselasky
1184290650Shselaskyerr_stop_poll:
1185341934Shselasky	mlx5_stop_health_poll(dev, boot);
1186290650Shselasky	if (mlx5_cmd_teardown_hca(dev)) {
1187353224Shselasky		mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
1188331580Shselasky		goto out_err;
1189290650Shselasky	}
1190290650Shselasky
1191290650Shselaskyreclaim_boot_pages:
1192290650Shselasky	mlx5_reclaim_startup_pages(dev);
1193290650Shselasky
1194290650Shselaskyerr_pagealloc_stop:
1195290650Shselasky	mlx5_pagealloc_stop(dev);
1196290650Shselasky
1197290650Shselaskyerr_disable_hca:
1198290650Shselasky	mlx5_core_disable_hca(dev);
1199290650Shselasky
1200290650Shselaskyerr_cmd_cleanup:
1201290650Shselasky	mlx5_cmd_cleanup(dev);
1202290650Shselasky
1203331580Shselaskyout_err:
1204331580Shselasky	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
1205331580Shselasky	mutex_unlock(&dev->intf_state_mutex);
1206290650Shselasky
1207290650Shselasky	return err;
1208290650Shselasky}
1209290650Shselasky
1210331810Shselaskystatic int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
1211331810Shselasky			   bool cleanup)
1212290650Shselasky{
1213331580Shselasky	int err = 0;
1214290650Shselasky
1215331811Shselasky	if (cleanup)
1216331811Shselasky		mlx5_drain_health_recovery(dev);
1217331811Shselasky
1218331580Shselasky	mutex_lock(&dev->intf_state_mutex);
1219347799Shselasky	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
1220353224Shselasky		mlx5_core_warn(dev, "%s: interface is down, NOP\n", __func__);
1221331810Shselasky                if (cleanup)
1222331810Shselasky                        mlx5_cleanup_once(dev);
1223331580Shselasky		goto out;
1224331580Shselasky	}
1225331580Shselasky
1226331580Shselasky	mlx5_unregister_device(dev);
1227331580Shselasky
1228359544Skib	mlx5_eswitch_cleanup(dev->priv.eswitch);
1229341958Shselasky	mlx5_fpga_device_stop(dev);
1230353197Shselasky	mlx5_mpfs_destroy(dev);
1231329200Shselasky	mlx5_cleanup_fs(dev);
1232290650Shselasky	unmap_bf_area(dev);
1233322144Shselasky	mlx5_wait_for_reclaim_vfs_pages(dev);
1234290650Shselasky	free_comp_eqs(dev);
1235290650Shselasky	mlx5_stop_eqs(dev);
1236290650Shselasky	mlx5_free_uuars(dev, &priv->uuari);
1237290650Shselasky	mlx5_disable_msix(dev);
1238331810Shselasky        if (cleanup)
1239331810Shselasky                mlx5_cleanup_once(dev);
1240341934Shselasky	mlx5_stop_health_poll(dev, cleanup);
1241331580Shselasky	err = mlx5_cmd_teardown_hca(dev);
1242331580Shselasky	if (err) {
1243353224Shselasky		mlx5_core_err(dev, "tear_down_hca failed, skip cleanup\n");
1244331580Shselasky		goto out;
1245290650Shselasky	}
1246290650Shselasky	mlx5_pagealloc_stop(dev);
1247290650Shselasky	mlx5_reclaim_startup_pages(dev);
1248290650Shselasky	mlx5_core_disable_hca(dev);
1249290650Shselasky	mlx5_cmd_cleanup(dev);
1250290650Shselasky
1251331580Shselaskyout:
1252331580Shselasky	clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
1253331580Shselasky	mutex_unlock(&dev->intf_state_mutex);
1254331580Shselasky	return err;
1255290650Shselasky}
1256290650Shselasky
1257331580Shselaskyvoid mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
1258331580Shselasky		     unsigned long param)
1259290650Shselasky{
1260290650Shselasky	struct mlx5_priv *priv = &dev->priv;
1261290650Shselasky	struct mlx5_device_context *dev_ctx;
1262290650Shselasky	unsigned long flags;
1263290650Shselasky
1264290650Shselasky	spin_lock_irqsave(&priv->ctx_lock, flags);
1265290650Shselasky
1266290650Shselasky	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
1267290650Shselasky		if (dev_ctx->intf->event)
1268290650Shselasky			dev_ctx->intf->event(dev, dev_ctx->context, event, param);
1269290650Shselasky
1270290650Shselasky	spin_unlock_irqrestore(&priv->ctx_lock, flags);
1271290650Shselasky}
1272290650Shselasky
1273290650Shselaskystruct mlx5_core_event_handler {
1274290650Shselasky	void (*event)(struct mlx5_core_dev *dev,
1275290650Shselasky		      enum mlx5_dev_event event,
1276290650Shselasky		      void *data);
1277290650Shselasky};
1278290650Shselasky
1279353206Shselasky#define	MLX5_STATS_DESC(a, b, c, d, e, ...) d, e,
1280353206Shselasky
1281353206Shselasky#define	MLX5_PORT_MODULE_ERROR_STATS(m)				\
1282353206Shselaskym(+1, u64, power_budget_exceeded, "power_budget", "Module Power Budget Exceeded") \
1283353206Shselaskym(+1, u64, long_range, "long_range", "Module Long Range for non MLNX cable/module") \
1284353206Shselaskym(+1, u64, bus_stuck, "bus_stuck", "Module Bus stuck(I2C or data shorted)") \
1285353206Shselaskym(+1, u64, no_eeprom, "no_eeprom", "No EEPROM/retry timeout") \
1286353206Shselaskym(+1, u64, enforce_part_number, "enforce_part_number", "Module Enforce part number list") \
1287353206Shselaskym(+1, u64, unknown_id, "unknown_id", "Module Unknown identifier") \
1288353206Shselaskym(+1, u64, high_temp, "high_temp", "Module High Temperature") \
1289361413Skibm(+1, u64, cable_shorted, "cable_shorted", "Module Cable is shorted") \
1290361414Skibm(+1, u64, pmd_type_not_enabled, "pmd_type_not_enabled", "PMD type is not enabled") \
1291361414Skibm(+1, u64, laster_tec_failure, "laster_tec_failure", "Laster TEC failure") \
1292361414Skibm(+1, u64, high_current, "high_current", "High current") \
1293361414Skibm(+1, u64, high_voltage, "high_voltage", "High voltage") \
1294361414Skibm(+1, u64, pcie_sys_power_slot_exceeded, "pcie_sys_power_slot_exceeded", "PCIe system power slot Exceeded") \
1295361414Skibm(+1, u64, high_power, "high_power", "High power")			\
1296361414Skibm(+1, u64, module_state_machine_fault, "module_state_machine_fault", "Module State Machine fault")
1297353206Shselasky
1298353206Shselaskystatic const char *mlx5_pme_err_desc[] = {
1299353206Shselasky	MLX5_PORT_MODULE_ERROR_STATS(MLX5_STATS_DESC)
1300353206Shselasky};
1301353206Shselasky
1302290650Shselaskystatic int init_one(struct pci_dev *pdev,
1303290650Shselasky		    const struct pci_device_id *id)
1304290650Shselasky{
1305290650Shselasky	struct mlx5_core_dev *dev;
1306290650Shselasky	struct mlx5_priv *priv;
1307338554Shselasky	device_t bsddev = pdev->dev.bsddev;
1308359540Skib#ifdef PCI_IOV
1309359540Skib	nvlist_t *pf_schema, *vf_schema;
1310359545Skib	int num_vfs, sriov_pos;
1311359540Skib#endif
1312353206Shselasky	int i,err;
1313353206Shselasky	struct sysctl_oid *pme_sysctl_node;
1314353206Shselasky	struct sysctl_oid *pme_err_sysctl_node;
1315359529Skib	struct sysctl_oid *cap_sysctl_node;
1316359529Skib	struct sysctl_oid *current_cap_sysctl_node;
1317359529Skib	struct sysctl_oid *max_cap_sysctl_node;
1318290650Shselasky
1319290650Shselasky	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
1320290650Shselasky	priv = &dev->priv;
1321306233Shselasky	if (id)
1322306233Shselasky		priv->pci_dev_data = id->driver_data;
1323290650Shselasky
1324347835Shselasky	if (mlx5_prof_sel < 0 || mlx5_prof_sel >= ARRAY_SIZE(profiles)) {
1325353224Shselasky		device_printf(bsddev,
1326353224Shselasky		    "WARN: selected profile out of range, selecting default (%d)\n",
1327353224Shselasky		    MLX5_DEFAULT_PROF);
1328347835Shselasky		mlx5_prof_sel = MLX5_DEFAULT_PROF;
1329290650Shselasky	}
1330347835Shselasky	dev->profile = &profiles[mlx5_prof_sel];
1331331580Shselasky	dev->pdev = pdev;
1332290650Shselasky	dev->event = mlx5_core_event;
1333290650Shselasky
1334341948Shselasky	/* Set desc */
1335341948Shselasky	device_set_desc(bsddev, mlx5_version);
1336341948Shselasky
1337338554Shselasky	sysctl_ctx_init(&dev->sysctl_ctx);
1338338554Shselasky	SYSCTL_ADD_INT(&dev->sysctl_ctx,
1339338554Shselasky	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1340338554Shselasky	    OID_AUTO, "msix_eqvec", CTLFLAG_RDTUN, &dev->msix_eqvec, 0,
1341338554Shselasky	    "Maximum number of MSIX event queue vectors, if set");
1342347862Shselasky	SYSCTL_ADD_INT(&dev->sysctl_ctx,
1343347862Shselasky	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1344347862Shselasky	    OID_AUTO, "power_status", CTLFLAG_RD, &dev->pwr_status, 0,
1345347862Shselasky	    "0:Invalid 1:Sufficient 2:Insufficient");
1346347862Shselasky	SYSCTL_ADD_INT(&dev->sysctl_ctx,
1347347862Shselasky	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1348347862Shselasky	    OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0,
1349347862Shselasky	    "Current power value in Watts");
1350338554Shselasky
1351353206Shselasky	pme_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1352353206Shselasky	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1353353206Shselasky	    OID_AUTO, "pme_stats", CTLFLAG_RD, NULL,
1354353206Shselasky	    "Port module event statistics");
1355353206Shselasky	if (pme_sysctl_node == NULL) {
1356353206Shselasky		err = -ENOMEM;
1357353206Shselasky		goto clean_sysctl_ctx;
1358353206Shselasky	}
1359353206Shselasky	pme_err_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1360353206Shselasky	    SYSCTL_CHILDREN(pme_sysctl_node),
1361353206Shselasky	    OID_AUTO, "errors", CTLFLAG_RD, NULL,
1362353206Shselasky	    "Port module event error statistics");
1363353206Shselasky	if (pme_err_sysctl_node == NULL) {
1364353206Shselasky		err = -ENOMEM;
1365353206Shselasky		goto clean_sysctl_ctx;
1366353206Shselasky	}
1367353206Shselasky	SYSCTL_ADD_U64(&dev->sysctl_ctx,
1368353206Shselasky	    SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
1369353206Shselasky	    "module_plug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1370353206Shselasky	    &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_PLUGGED_ENABLED],
1371353206Shselasky	    0, "Number of time module plugged");
1372353206Shselasky	SYSCTL_ADD_U64(&dev->sysctl_ctx,
1373353206Shselasky	    SYSCTL_CHILDREN(pme_sysctl_node), OID_AUTO,
1374353206Shselasky	    "module_unplug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1375353206Shselasky	    &dev->priv.pme_stats.status_counters[MLX5_MODULE_STATUS_UNPLUGGED],
1376353206Shselasky	    0, "Number of time module unplugged");
1377353206Shselasky	for (i = 0 ; i < MLX5_MODULE_EVENT_ERROR_NUM; i++) {
1378353206Shselasky		SYSCTL_ADD_U64(&dev->sysctl_ctx,
1379353206Shselasky		    SYSCTL_CHILDREN(pme_err_sysctl_node), OID_AUTO,
1380353206Shselasky		    mlx5_pme_err_desc[2 * i], CTLFLAG_RD | CTLFLAG_MPSAFE,
1381353206Shselasky		    &dev->priv.pme_stats.error_counters[i],
1382353206Shselasky		    0, mlx5_pme_err_desc[2 * i + 1]);
1383353206Shselasky	}
1384353206Shselasky
1385359529Skib	cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1386359529Skib	    SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)),
1387359529Skib	    OID_AUTO, "caps", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1388359529Skib	    "hardware capabilities raw bitstrings");
1389359529Skib	if (cap_sysctl_node == NULL) {
1390359529Skib		err = -ENOMEM;
1391359529Skib		goto clean_sysctl_ctx;
1392359529Skib	}
1393359529Skib	current_cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1394359529Skib	    SYSCTL_CHILDREN(cap_sysctl_node),
1395359529Skib	    OID_AUTO, "current", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1396359529Skib	    "");
1397359529Skib	if (current_cap_sysctl_node == NULL) {
1398359529Skib		err = -ENOMEM;
1399359529Skib		goto clean_sysctl_ctx;
1400359529Skib	}
1401359529Skib	max_cap_sysctl_node = SYSCTL_ADD_NODE(&dev->sysctl_ctx,
1402359529Skib	    SYSCTL_CHILDREN(cap_sysctl_node),
1403359529Skib	    OID_AUTO, "max", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
1404359529Skib	    "");
1405359529Skib	if (max_cap_sysctl_node == NULL) {
1406359529Skib		err = -ENOMEM;
1407359529Skib		goto clean_sysctl_ctx;
1408359529Skib	}
1409359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1410359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1411359529Skib	    OID_AUTO, "general", CTLFLAG_RD | CTLFLAG_MPSAFE,
1412359529Skib	    &dev->hca_caps_cur[MLX5_CAP_GENERAL],
1413359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1414359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1415359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1416359529Skib	    OID_AUTO, "general", CTLFLAG_RD | CTLFLAG_MPSAFE,
1417359529Skib	    &dev->hca_caps_max[MLX5_CAP_GENERAL],
1418359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1419359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1420359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1421359529Skib	    OID_AUTO, "ether", CTLFLAG_RD | CTLFLAG_MPSAFE,
1422359529Skib	    &dev->hca_caps_cur[MLX5_CAP_ETHERNET_OFFLOADS],
1423359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1424359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1425359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1426359529Skib	    OID_AUTO, "ether", CTLFLAG_RD | CTLFLAG_MPSAFE,
1427359529Skib	    &dev->hca_caps_max[MLX5_CAP_ETHERNET_OFFLOADS],
1428359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1429359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1430359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1431359529Skib	    OID_AUTO, "odp", CTLFLAG_RD | CTLFLAG_MPSAFE,
1432359529Skib	    &dev->hca_caps_cur[MLX5_CAP_ODP],
1433359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1434359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1435359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1436359529Skib	    OID_AUTO, "odp", CTLFLAG_RD | CTLFLAG_MPSAFE,
1437359529Skib	    &dev->hca_caps_max[MLX5_CAP_ODP],
1438359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1439359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1440359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1441359529Skib	    OID_AUTO, "atomic", CTLFLAG_RD | CTLFLAG_MPSAFE,
1442359529Skib	    &dev->hca_caps_cur[MLX5_CAP_ATOMIC],
1443359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1444359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1445359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1446359529Skib	    OID_AUTO, "atomic", CTLFLAG_RD | CTLFLAG_MPSAFE,
1447359529Skib	    &dev->hca_caps_max[MLX5_CAP_ATOMIC],
1448359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1449359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1450359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1451359529Skib	    OID_AUTO, "roce", CTLFLAG_RD | CTLFLAG_MPSAFE,
1452359529Skib	    &dev->hca_caps_cur[MLX5_CAP_ROCE],
1453359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1454359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1455359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1456359529Skib	    OID_AUTO, "roce", CTLFLAG_RD | CTLFLAG_MPSAFE,
1457359529Skib	    &dev->hca_caps_max[MLX5_CAP_ROCE],
1458359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1459359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1460359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1461359529Skib	    OID_AUTO, "ipoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1462359529Skib	    &dev->hca_caps_cur[MLX5_CAP_IPOIB_OFFLOADS],
1463359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1464359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1465359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1466359529Skib	    OID_AUTO, "ipoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1467359529Skib	    &dev->hca_caps_max[MLX5_CAP_IPOIB_OFFLOADS],
1468359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1469359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1470359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1471359529Skib	    OID_AUTO, "eoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1472359529Skib	    &dev->hca_caps_cur[MLX5_CAP_EOIB_OFFLOADS],
1473359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1474359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1475359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1476359529Skib	    OID_AUTO, "eoib", CTLFLAG_RD | CTLFLAG_MPSAFE,
1477359529Skib	    &dev->hca_caps_max[MLX5_CAP_EOIB_OFFLOADS],
1478359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1479359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1480359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1481359529Skib	    OID_AUTO, "flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1482359529Skib	    &dev->hca_caps_cur[MLX5_CAP_FLOW_TABLE],
1483359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1484359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1485359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1486359529Skib	    OID_AUTO, "flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1487359529Skib	    &dev->hca_caps_max[MLX5_CAP_FLOW_TABLE],
1488359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1489359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1490359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1491359529Skib	    OID_AUTO, "eswitch_flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1492359529Skib	    &dev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE],
1493359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1494359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1495359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1496359529Skib	    OID_AUTO, "eswitch_flow_table", CTLFLAG_RD | CTLFLAG_MPSAFE,
1497359529Skib	    &dev->hca_caps_max[MLX5_CAP_ESWITCH_FLOW_TABLE],
1498359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1499359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1500359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1501359529Skib	    OID_AUTO, "eswitch", CTLFLAG_RD | CTLFLAG_MPSAFE,
1502359529Skib	    &dev->hca_caps_cur[MLX5_CAP_ESWITCH],
1503359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1504359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1505359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1506359529Skib	    OID_AUTO, "eswitch", CTLFLAG_RD | CTLFLAG_MPSAFE,
1507359529Skib	    &dev->hca_caps_max[MLX5_CAP_ESWITCH],
1508359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1509359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1510359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1511359529Skib	    OID_AUTO, "snapshot", CTLFLAG_RD | CTLFLAG_MPSAFE,
1512359529Skib	    &dev->hca_caps_cur[MLX5_CAP_SNAPSHOT],
1513359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1514359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1515359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1516359529Skib	    OID_AUTO, "snapshot", CTLFLAG_RD | CTLFLAG_MPSAFE,
1517359529Skib	    &dev->hca_caps_max[MLX5_CAP_SNAPSHOT],
1518359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1519359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1520359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1521359529Skib	    OID_AUTO, "vector_calc", CTLFLAG_RD | CTLFLAG_MPSAFE,
1522359529Skib	    &dev->hca_caps_cur[MLX5_CAP_VECTOR_CALC],
1523359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1524359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1525359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1526359529Skib	    OID_AUTO, "vector_calc", CTLFLAG_RD | CTLFLAG_MPSAFE,
1527359529Skib	    &dev->hca_caps_max[MLX5_CAP_VECTOR_CALC],
1528359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1529359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1530359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1531359529Skib	    OID_AUTO, "qos", CTLFLAG_RD | CTLFLAG_MPSAFE,
1532359529Skib	    &dev->hca_caps_cur[MLX5_CAP_QOS],
1533359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1534359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1535359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1536359529Skib	    OID_AUTO, "qos", CTLFLAG_RD | CTLFLAG_MPSAFE,
1537359529Skib	    &dev->hca_caps_max[MLX5_CAP_QOS],
1538359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1539359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1540359529Skib	    SYSCTL_CHILDREN(current_cap_sysctl_node),
1541359529Skib	    OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1542359529Skib	    &dev->hca_caps_cur[MLX5_CAP_DEBUG],
1543359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1544359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1545359529Skib	    SYSCTL_CHILDREN(max_cap_sysctl_node),
1546359529Skib	    OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_MPSAFE,
1547359529Skib	    &dev->hca_caps_max[MLX5_CAP_DEBUG],
1548359529Skib	    MLX5_UN_SZ_DW(hca_cap_union) * sizeof(u32), "IU", "");
1549359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1550359529Skib	    SYSCTL_CHILDREN(cap_sysctl_node),
1551359529Skib	    OID_AUTO, "pcam", CTLFLAG_RD | CTLFLAG_MPSAFE,
1552359529Skib	    &dev->caps.pcam, sizeof(dev->caps.pcam), "IU", "");
1553359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1554359529Skib	    SYSCTL_CHILDREN(cap_sysctl_node),
1555359529Skib	    OID_AUTO, "mcam", CTLFLAG_RD | CTLFLAG_MPSAFE,
1556359529Skib	    &dev->caps.mcam, sizeof(dev->caps.mcam), "IU", "");
1557359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1558359529Skib	    SYSCTL_CHILDREN(cap_sysctl_node),
1559359529Skib	    OID_AUTO, "qcam", CTLFLAG_RD | CTLFLAG_MPSAFE,
1560359529Skib	    &dev->caps.qcam, sizeof(dev->caps.qcam), "IU", "");
1561359529Skib	SYSCTL_ADD_OPAQUE(&dev->sysctl_ctx,
1562359529Skib	    SYSCTL_CHILDREN(cap_sysctl_node),
1563359529Skib	    OID_AUTO, "fpga", CTLFLAG_RD | CTLFLAG_MPSAFE,
1564359529Skib	    &dev->caps.fpga, sizeof(dev->caps.fpga), "IU", "");
1565353206Shselasky
1566290650Shselasky	INIT_LIST_HEAD(&priv->ctx_list);
1567290650Shselasky	spin_lock_init(&priv->ctx_lock);
1568341930Shselasky	mutex_init(&dev->pci_status_mutex);
1569341930Shselasky	mutex_init(&dev->intf_state_mutex);
1570347880Shselasky	mtx_init(&dev->dump_lock, "mlx5dmp", NULL, MTX_DEF | MTX_NEW);
1571331580Shselasky	err = mlx5_pci_init(dev, priv);
1572290650Shselasky	if (err) {
1573353224Shselasky		mlx5_core_err(dev, "mlx5_pci_init failed %d\n", err);
1574331580Shselasky		goto clean_dev;
1575290650Shselasky	}
1576290650Shselasky
1577341930Shselasky	err = mlx5_health_init(dev);
1578341930Shselasky	if (err) {
1579353224Shselasky		mlx5_core_err(dev, "mlx5_health_init failed %d\n", err);
1580341930Shselasky		goto close_pci;
1581341930Shselasky	}
1582331580Shselasky
1583331810Shselasky	mlx5_pagealloc_init(dev);
1584331810Shselasky
1585331810Shselasky	err = mlx5_load_one(dev, priv, true);
1586290650Shselasky	if (err) {
1587353224Shselasky		mlx5_core_err(dev, "mlx5_load_one failed %d\n", err);
1588331580Shselasky		goto clean_health;
1589290650Shselasky	}
1590290650Shselasky
1591331914Shselasky	mlx5_fwdump_prep(dev);
1592331914Shselasky
1593347847Shselasky	mlx5_firmware_update(dev);
1594347847Shselasky
1595359540Skib#ifdef PCI_IOV
1596359540Skib	if (MLX5_CAP_GEN(dev, vport_group_manager)) {
1597359545Skib		if (pci_find_extcap(bsddev, PCIZ_SRIOV, &sriov_pos) == 0) {
1598359545Skib			num_vfs = pci_read_config(bsddev, sriov_pos +
1599359545Skib			    PCIR_SRIOV_TOTAL_VFS, 2);
1600359545Skib		} else {
1601359548Skib			mlx5_core_info(dev, "cannot find SR-IOV PCIe cap\n");
1602359545Skib			num_vfs = 0;
1603359545Skib		}
1604359545Skib		err = mlx5_eswitch_init(dev, 1 + num_vfs);
1605359544Skib		if (err == 0) {
1606359544Skib			pf_schema = pci_iov_schema_alloc_node();
1607359544Skib			vf_schema = pci_iov_schema_alloc_node();
1608359544Skib			pci_iov_schema_add_unicast_mac(vf_schema,
1609359544Skib			    iov_mac_addr_name, 0, NULL);
1610365414Skib			pci_iov_schema_add_uint64(vf_schema, iov_node_guid_name,
1611365414Skib			    0, 0);
1612365414Skib			pci_iov_schema_add_uint64(vf_schema, iov_port_guid_name,
1613365414Skib			    0, 0);
1614359544Skib			err = pci_iov_attach(bsddev, pf_schema, vf_schema);
1615359544Skib			if (err != 0) {
1616359544Skib				device_printf(bsddev,
1617359540Skib			    "Failed to initialize SR-IOV support, error %d\n",
1618359544Skib				    err);
1619359544Skib			}
1620359544Skib		} else {
1621359544Skib			mlx5_core_err(dev, "eswitch init failed, error %d\n",
1622359540Skib			    err);
1623359540Skib		}
1624359540Skib	}
1625359540Skib#endif
1626359540Skib
1627341930Shselasky	pci_save_state(bsddev);
1628290650Shselasky	return 0;
1629290650Shselasky
1630331580Shselaskyclean_health:
1631331810Shselasky	mlx5_pagealloc_cleanup(dev);
1632341930Shselasky	mlx5_health_cleanup(dev);
1633331580Shselaskyclose_pci:
1634341930Shselasky	mlx5_pci_close(dev, priv);
1635331580Shselaskyclean_dev:
1636353206Shselasky	mtx_destroy(&dev->dump_lock);
1637353206Shselaskyclean_sysctl_ctx:
1638338554Shselasky	sysctl_ctx_free(&dev->sysctl_ctx);
1639290650Shselasky	kfree(dev);
1640290650Shselasky	return err;
1641290650Shselasky}
1642290650Shselasky
1643290650Shselaskystatic void remove_one(struct pci_dev *pdev)
1644290650Shselasky{
1645290650Shselasky	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1646331580Shselasky	struct mlx5_priv *priv = &dev->priv;
1647290650Shselasky
1648369090Sgit2svn#ifdef PCI_IOV
1649369090Sgit2svn	pci_iov_detach(pdev->dev.bsddev);
1650369090Sgit2svn	mlx5_eswitch_disable_sriov(priv->eswitch);
1651369090Sgit2svn#endif
1652369090Sgit2svn
1653331810Shselasky	if (mlx5_unload_one(dev, priv, true)) {
1654366881Shselasky		mlx5_core_err(dev, "mlx5_unload_one() failed, leaked %lld bytes\n",
1655366881Shselasky		    (long long)(dev->priv.fw_pages * MLX5_ADAPTER_PAGE_SIZE));
1656331580Shselasky	}
1657331580Shselasky
1658331810Shselasky	mlx5_pagealloc_cleanup(dev);
1659331580Shselasky	mlx5_health_cleanup(dev);
1660347880Shselasky	mlx5_fwdump_clean(dev);
1661331580Shselasky	mlx5_pci_close(dev, priv);
1662347880Shselasky	mtx_destroy(&dev->dump_lock);
1663331580Shselasky	pci_set_drvdata(pdev, NULL);
1664338554Shselasky	sysctl_ctx_free(&dev->sysctl_ctx);
1665290650Shselasky	kfree(dev);
1666290650Shselasky}
1667290650Shselasky
1668331580Shselaskystatic pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
1669331580Shselasky					      pci_channel_state_t state)
1670331580Shselasky{
1671331580Shselasky	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1672331580Shselasky	struct mlx5_priv *priv = &dev->priv;
1673331580Shselasky
1674353224Shselasky	mlx5_core_info(dev, "%s was called\n", __func__);
1675331810Shselasky	mlx5_enter_error_state(dev, false);
1676331810Shselasky	mlx5_unload_one(dev, priv, false);
1677331914Shselasky
1678331582Shselasky	if (state) {
1679331582Shselasky		mlx5_drain_health_wq(dev);
1680331582Shselasky		mlx5_pci_disable_device(dev);
1681331582Shselasky	}
1682331582Shselasky
1683331580Shselasky	return state == pci_channel_io_perm_failure ?
1684331580Shselasky		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
1685331580Shselasky}
1686331580Shselasky
1687331580Shselaskystatic pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
1688331580Shselasky{
1689331580Shselasky	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1690331580Shselasky	int err = 0;
1691331580Shselasky
1692353224Shselasky	mlx5_core_info(dev,"%s was called\n", __func__);
1693331580Shselasky
1694331580Shselasky	err = mlx5_pci_enable_device(dev);
1695331580Shselasky	if (err) {
1696353224Shselasky		mlx5_core_err(dev, "mlx5_pci_enable_device failed with error code: %d\n"
1697353224Shselasky			,err);
1698331580Shselasky		return PCI_ERS_RESULT_DISCONNECT;
1699331580Shselasky	}
1700331580Shselasky	pci_set_master(pdev);
1701331580Shselasky	pci_set_powerstate(pdev->dev.bsddev, PCI_POWERSTATE_D0);
1702331580Shselasky	pci_restore_state(pdev->dev.bsddev);
1703331816Shselasky	pci_save_state(pdev->dev.bsddev);
1704331580Shselasky
1705331580Shselasky	return err ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
1706331580Shselasky}
1707331580Shselasky
1708331580Shselasky/* wait for the device to show vital signs. For now we check
1709331580Shselasky * that we can read the device ID and that the health buffer
1710331580Shselasky * shows a non zero value which is different than 0xffffffff
1711331580Shselasky */
1712331580Shselaskystatic void wait_vital(struct pci_dev *pdev)
1713331580Shselasky{
1714331580Shselasky	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1715331580Shselasky	struct mlx5_core_health *health = &dev->priv.health;
1716331580Shselasky	const int niter = 100;
1717331580Shselasky	u32 count;
1718331580Shselasky	u16 did;
1719331580Shselasky	int i;
1720331580Shselasky
1721331580Shselasky	/* Wait for firmware to be ready after reset */
1722331580Shselasky	msleep(1000);
1723331580Shselasky	for (i = 0; i < niter; i++) {
1724331580Shselasky		if (pci_read_config_word(pdev, 2, &did)) {
1725353224Shselasky			mlx5_core_warn(dev, "failed reading config word\n");
1726331580Shselasky			break;
1727331580Shselasky		}
1728331580Shselasky		if (did == pdev->device) {
1729353224Shselasky			mlx5_core_info(dev,
1730353224Shselasky			    "device ID correctly read after %d iterations\n", i);
1731331580Shselasky			break;
1732331580Shselasky		}
1733331580Shselasky		msleep(50);
1734331580Shselasky	}
1735331580Shselasky	if (i == niter)
1736353224Shselasky		mlx5_core_warn(dev, "could not read device ID\n");
1737331580Shselasky
1738331580Shselasky	for (i = 0; i < niter; i++) {
1739331580Shselasky		count = ioread32be(health->health_counter);
1740331580Shselasky		if (count && count != 0xffffffff) {
1741353224Shselasky			mlx5_core_info(dev,
1742353224Shselasky			"Counter value 0x%x after %d iterations\n", count, i);
1743331580Shselasky			break;
1744331580Shselasky		}
1745331580Shselasky		msleep(50);
1746331580Shselasky	}
1747331580Shselasky
1748331580Shselasky	if (i == niter)
1749353224Shselasky		mlx5_core_warn(dev, "could not read device ID\n");
1750331580Shselasky}
1751331580Shselasky
1752331580Shselaskystatic void mlx5_pci_resume(struct pci_dev *pdev)
1753331580Shselasky{
1754331580Shselasky	struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
1755331580Shselasky	struct mlx5_priv *priv = &dev->priv;
1756331580Shselasky	int err;
1757331580Shselasky
1758353224Shselasky	mlx5_core_info(dev,"%s was called\n", __func__);
1759331580Shselasky
1760331580Shselasky	wait_vital(pdev);
1761331580Shselasky
1762331810Shselasky	err = mlx5_load_one(dev, priv, false);
1763331580Shselasky	if (err)
1764353224Shselasky		mlx5_core_err(dev,
1765353224Shselasky		    "mlx5_load_one failed with error code: %d\n" ,err);
1766331580Shselasky	else
1767353224Shselasky		mlx5_core_info(dev,"device recovered\n");
1768331580Shselasky}
1769331580Shselasky
1770331580Shselaskystatic const struct pci_error_handlers mlx5_err_handler = {
1771331580Shselasky	.error_detected = mlx5_pci_err_detected,
1772331580Shselasky	.slot_reset	= mlx5_pci_slot_reset,
1773331580Shselasky	.resume		= mlx5_pci_resume
1774331580Shselasky};
1775331580Shselasky
1776359540Skib#ifdef PCI_IOV
1777359540Skibstatic int
1778359540Skibmlx5_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *pf_config)
1779359540Skib{
1780359540Skib	struct pci_dev *pdev;
1781359540Skib	struct mlx5_core_dev *core_dev;
1782359540Skib	struct mlx5_priv *priv;
1783359544Skib	int err;
1784359540Skib
1785359540Skib	pdev = device_get_softc(dev);
1786359540Skib	core_dev = pci_get_drvdata(pdev);
1787359540Skib	priv = &core_dev->priv;
1788359540Skib
1789359545Skib	if (priv->eswitch == NULL)
1790359545Skib		return (ENXIO);
1791359545Skib	if (priv->eswitch->total_vports < num_vfs + 1)
1792359545Skib		num_vfs = priv->eswitch->total_vports - 1;
1793359544Skib	err = mlx5_eswitch_enable_sriov(priv->eswitch, num_vfs);
1794359544Skib	return (-err);
1795359540Skib}
1796359540Skib
1797359540Skibstatic void
1798359540Skibmlx5_iov_uninit(device_t dev)
1799359540Skib{
1800359540Skib	struct pci_dev *pdev;
1801359540Skib	struct mlx5_core_dev *core_dev;
1802359540Skib	struct mlx5_priv *priv;
1803359540Skib
1804359540Skib	pdev = device_get_softc(dev);
1805359540Skib	core_dev = pci_get_drvdata(pdev);
1806359540Skib	priv = &core_dev->priv;
1807359544Skib
1808359544Skib	mlx5_eswitch_disable_sriov(priv->eswitch);
1809359540Skib}
1810359540Skib
1811359540Skibstatic int
1812359540Skibmlx5_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *vf_config)
1813359540Skib{
1814359540Skib	struct pci_dev *pdev;
1815359540Skib	struct mlx5_core_dev *core_dev;
1816359540Skib	struct mlx5_priv *priv;
1817359544Skib	const void *mac;
1818359544Skib	size_t mac_size;
1819365414Skib	uint64_t node_guid, port_guid;
1820359540Skib	int error;
1821359540Skib
1822359540Skib	pdev = device_get_softc(dev);
1823359540Skib	core_dev = pci_get_drvdata(pdev);
1824359540Skib	priv = &core_dev->priv;
1825359540Skib
1826359545Skib	if (vfnum + 1 >= priv->eswitch->total_vports)
1827359545Skib		return (ENXIO);
1828359545Skib
1829359544Skib	if (nvlist_exists_binary(vf_config, iov_mac_addr_name)) {
1830359544Skib		mac = nvlist_get_binary(vf_config, iov_mac_addr_name,
1831359544Skib		    &mac_size);
1832359544Skib		error = -mlx5_eswitch_set_vport_mac(priv->eswitch,
1833359544Skib		    vfnum + 1, __DECONST(u8 *, mac));
1834365413Skib		if (error != 0) {
1835365413Skib			mlx5_core_err(core_dev,
1836365413Skib			    "setting MAC for VF %d failed, error %d\n",
1837365413Skib			    vfnum + 1, error);
1838365413Skib		}
1839359544Skib	}
1840359544Skib
1841365414Skib	if (nvlist_exists_number(vf_config, iov_node_guid_name)) {
1842365414Skib		node_guid = nvlist_get_number(vf_config, iov_node_guid_name);
1843365414Skib		error = -mlx5_modify_nic_vport_node_guid(core_dev, vfnum + 1,
1844365414Skib		    node_guid);
1845365414Skib		if (error != 0) {
1846365414Skib			mlx5_core_err(core_dev,
1847365414Skib		    "modifying node GUID for VF %d failed, error %d\n",
1848365414Skib			    vfnum + 1, error);
1849365414Skib		}
1850365414Skib	}
1851365414Skib
1852365414Skib	if (nvlist_exists_number(vf_config, iov_port_guid_name)) {
1853365414Skib		port_guid = nvlist_get_number(vf_config, iov_port_guid_name);
1854365414Skib		error = -mlx5_modify_nic_vport_port_guid(core_dev, vfnum + 1,
1855365414Skib		    port_guid);
1856365414Skib		if (error != 0) {
1857365414Skib			mlx5_core_err(core_dev,
1858365414Skib		    "modifying port GUID for VF %d failed, error %d\n",
1859365414Skib			    vfnum + 1, error);
1860365414Skib		}
1861365414Skib	}
1862365414Skib
1863359544Skib	error = -mlx5_eswitch_set_vport_state(priv->eswitch, vfnum + 1,
1864359544Skib	    VPORT_STATE_FOLLOW);
1865359544Skib	if (error != 0) {
1866359544Skib		mlx5_core_err(core_dev,
1867359544Skib		    "upping vport for VF %d failed, error %d\n",
1868359544Skib		    vfnum + 1, error);
1869359544Skib	}
1870359540Skib	error = -mlx5_core_enable_hca(core_dev, vfnum + 1);
1871359540Skib	if (error != 0) {
1872359540Skib		mlx5_core_err(core_dev, "enabling VF %d failed, error %d\n",
1873359544Skib		    vfnum + 1, error);
1874359540Skib	}
1875359540Skib	return (error);
1876359540Skib}
1877359540Skib#endif
1878359540Skib
1879331810Shselaskystatic int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
1880331810Shselasky{
1881347818Shselasky	bool fast_teardown, force_teardown;
1882331810Shselasky	int err;
1883331810Shselasky
1884347819Shselasky	if (!mlx5_fast_unload_enabled) {
1885347819Shselasky		mlx5_core_dbg(dev, "fast unload is disabled by user\n");
1886347819Shselasky		return -EOPNOTSUPP;
1887347819Shselasky	}
1888347819Shselasky
1889347818Shselasky	fast_teardown = MLX5_CAP_GEN(dev, fast_teardown);
1890347818Shselasky	force_teardown = MLX5_CAP_GEN(dev, force_teardown);
1891347818Shselasky
1892347818Shselasky	mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown);
1893347818Shselasky	mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown);
1894347818Shselasky
1895347818Shselasky	if (!fast_teardown && !force_teardown)
1896331810Shselasky		return -EOPNOTSUPP;
1897331810Shselasky
1898331810Shselasky	if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
1899331810Shselasky		mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
1900331810Shselasky		return -EAGAIN;
1901331810Shselasky	}
1902331810Shselasky
1903341934Shselasky	/* Panic tear down fw command will stop the PCI bus communication
1904341934Shselasky	 * with the HCA, so the health polll is no longer needed.
1905341934Shselasky	 */
1906341934Shselasky	mlx5_drain_health_wq(dev);
1907341934Shselasky	mlx5_stop_health_poll(dev, false);
1908341934Shselasky
1909347818Shselasky	err = mlx5_cmd_fast_teardown_hca(dev);
1910347818Shselasky	if (!err)
1911347818Shselasky		goto done;
1912347818Shselasky
1913331810Shselasky	err = mlx5_cmd_force_teardown_hca(dev);
1914347818Shselasky	if (!err)
1915347818Shselasky		goto done;
1916331810Shselasky
1917347818Shselasky	mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", err);
1918347818Shselasky	mlx5_start_health_poll(dev);
1919347818Shselasky	return err;
1920347818Shselaskydone:
1921331810Shselasky	mlx5_enter_error_state(dev, true);
1922331810Shselasky	return 0;
1923331810Shselasky}
1924331810Shselasky
1925358015Shselaskystatic void mlx5_shutdown_disable_interrupts(struct mlx5_core_dev *mdev)
1926347802Shselasky{
1927347802Shselasky	int nvec = mdev->priv.eq_table.num_comp_vectors + MLX5_EQ_VEC_COMP_BASE;
1928347802Shselasky	int x;
1929347802Shselasky
1930347802Shselasky	mdev->priv.disable_irqs = 1;
1931347802Shselasky
1932347802Shselasky	/* wait for all IRQ handlers to finish processing */
1933347802Shselasky	for (x = 0; x != nvec; x++)
1934347802Shselasky		synchronize_irq(mdev->priv.msix_arr[x].vector);
1935347802Shselasky}
1936347802Shselasky
1937329211Shselaskystatic void shutdown_one(struct pci_dev *pdev)
1938329211Shselasky{
1939331580Shselasky	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
1940331580Shselasky	struct mlx5_priv *priv = &dev->priv;
1941331810Shselasky	int err;
1942331580Shselasky
1943347802Shselasky	/* enter polling mode */
1944347802Shselasky	mlx5_cmd_use_polling(dev);
1945347802Shselasky
1946355653Skib	set_bit(MLX5_INTERFACE_STATE_TEARDOWN, &dev->intf_state);
1947355653Skib
1948347802Shselasky	/* disable all interrupts */
1949358015Shselasky	mlx5_shutdown_disable_interrupts(dev);
1950347802Shselasky
1951331810Shselasky	err = mlx5_try_fast_unload(dev);
1952331810Shselasky	if (err)
1953331810Shselasky	        mlx5_unload_one(dev, priv, false);
1954331580Shselasky	mlx5_pci_disable_device(dev);
1955329211Shselasky}
1956329211Shselasky
1957290650Shselaskystatic const struct pci_device_id mlx5_core_pci_table[] = {
1958290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4113) }, /* Connect-IB */
1959290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4114) }, /* Connect-IB VF */
1960290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4115) }, /* ConnectX-4 */
1961290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4116) }, /* ConnectX-4 VF */
1962290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4117) }, /* ConnectX-4LX */
1963290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4118) }, /* ConnectX-4LX VF */
1964306233Shselasky	{ PCI_VDEVICE(MELLANOX, 4119) }, /* ConnectX-5 */
1965306233Shselasky	{ PCI_VDEVICE(MELLANOX, 4120) }, /* ConnectX-5 VF */
1966290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4121) },
1967290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4122) },
1968290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4123) },
1969290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4124) },
1970290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4125) },
1971290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4126) },
1972290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4127) },
1973290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4128) },
1974290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4129) },
1975290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4130) },
1976290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4131) },
1977290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4132) },
1978290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4133) },
1979290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4134) },
1980290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4135) },
1981290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4136) },
1982290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4137) },
1983290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4138) },
1984290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4139) },
1985290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4140) },
1986290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4141) },
1987290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4142) },
1988290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4143) },
1989290650Shselasky	{ PCI_VDEVICE(MELLANOX, 4144) },
1990290650Shselasky	{ 0, }
1991290650Shselasky};
1992290650Shselasky
1993290650ShselaskyMODULE_DEVICE_TABLE(pci, mlx5_core_pci_table);
1994290650Shselasky
1995331809Shselaskyvoid mlx5_disable_device(struct mlx5_core_dev *dev)
1996331809Shselasky{
1997331809Shselasky	mlx5_pci_err_detected(dev->pdev, 0);
1998331809Shselasky}
1999331809Shselasky
2000331809Shselaskyvoid mlx5_recover_device(struct mlx5_core_dev *dev)
2001331809Shselasky{
2002331809Shselasky	mlx5_pci_disable_device(dev);
2003331809Shselasky	if (mlx5_pci_slot_reset(dev->pdev) == PCI_ERS_RESULT_RECOVERED)
2004331809Shselasky		mlx5_pci_resume(dev->pdev);
2005331809Shselasky}
2006331809Shselasky
2007331586Shselaskystruct pci_driver mlx5_core_driver = {
2008290650Shselasky	.name           = DRIVER_NAME,
2009290650Shselasky	.id_table       = mlx5_core_pci_table,
2010329211Shselasky	.shutdown	= shutdown_one,
2011290650Shselasky	.probe          = init_one,
2012331580Shselasky	.remove         = remove_one,
2013359540Skib	.err_handler	= &mlx5_err_handler,
2014359540Skib#ifdef PCI_IOV
2015359540Skib	.bsd_iov_init	= mlx5_iov_init,
2016359540Skib	.bsd_iov_uninit	= mlx5_iov_uninit,
2017359540Skib	.bsd_iov_add_vf	= mlx5_iov_add_vf,
2018359540Skib#endif
2019290650Shselasky};
2020290650Shselasky
2021290650Shselaskystatic int __init init(void)
2022290650Shselasky{
2023290650Shselasky	int err;
2024290650Shselasky
2025290650Shselasky	err = pci_register_driver(&mlx5_core_driver);
2026290650Shselasky	if (err)
2027331580Shselasky		goto err_debug;
2028290650Shselasky
2029347871Shselasky	err = mlx5_ctl_init();
2030331586Shselasky	if (err)
2031347871Shselasky		goto err_ctl;
2032331586Shselasky
2033331586Shselasky 	return 0;
2034331586Shselasky
2035347871Shselaskyerr_ctl:
2036331586Shselasky	pci_unregister_driver(&mlx5_core_driver);
2037290650Shselasky
2038290650Shselaskyerr_debug:
2039290650Shselasky	return err;
2040290650Shselasky}
2041290650Shselasky
2042290650Shselaskystatic void __exit cleanup(void)
2043290650Shselasky{
2044347871Shselasky	mlx5_ctl_fini();
2045290650Shselasky	pci_unregister_driver(&mlx5_core_driver);
2046290650Shselasky}
2047290650Shselasky
2048363151Shselaskymodule_init_order(init, SI_ORDER_FIRST);
2049363151Shselaskymodule_exit_order(cleanup, SI_ORDER_FIRST);
2050