mlx5_ib_main.c revision 322810
1322810Shselasky/*-
2322810Shselasky * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3322810Shselasky *
4322810Shselasky * Redistribution and use in source and binary forms, with or without
5322810Shselasky * modification, are permitted provided that the following conditions
6322810Shselasky * are met:
7322810Shselasky * 1. Redistributions of source code must retain the above copyright
8322810Shselasky *    notice, this list of conditions and the following disclaimer.
9322810Shselasky * 2. Redistributions in binary form must reproduce the above copyright
10322810Shselasky *    notice, this list of conditions and the following disclaimer in the
11322810Shselasky *    documentation and/or other materials provided with the distribution.
12322810Shselasky *
13322810Shselasky * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14322810Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15322810Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16322810Shselasky * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17322810Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18322810Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19322810Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20322810Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21322810Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22322810Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23322810Shselasky * SUCH DAMAGE.
24322810Shselasky *
25322810Shselasky * $FreeBSD: head/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c 322810 2017-08-23 12:09:37Z hselasky $
26322810Shselasky */
27322810Shselasky
28322810Shselasky#include <linux/errno.h>
29322810Shselasky#include <linux/pci.h>
30322810Shselasky#include <linux/dma-mapping.h>
31322810Shselasky#include <linux/slab.h>
32322810Shselasky#include <linux/io-mapping.h>
33322810Shselasky#include <linux/sched.h>
34322810Shselasky#include <linux/netdevice.h>
35322810Shselasky#include <linux/etherdevice.h>
36322810Shselasky#include <linux/list.h>
37322810Shselasky#include <dev/mlx5/driver.h>
38322810Shselasky#include <dev/mlx5/vport.h>
39322810Shselasky#include <asm/pgtable.h>
40322810Shselasky#include <linux/fs.h>
41322810Shselasky#undef inode
42322810Shselasky
43322810Shselasky#include <rdma/ib_user_verbs.h>
44322810Shselasky#include <rdma/ib_smi.h>
45322810Shselasky#include <rdma/ib_umem.h>
46322810Shselasky#include "user.h"
47322810Shselasky#include "mlx5_ib.h"
48322810Shselasky
49322810Shselasky#include <sys/unistd.h>
50322810Shselasky#include <sys/kthread.h>
51322810Shselasky
52322810Shselasky#define DRIVER_NAME "mlx5_ib"
53322810Shselasky#define DRIVER_VERSION "3.2-rc1"
54322810Shselasky#define DRIVER_RELDATE	"May 2016"
55322810Shselasky
56322810ShselaskyMODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
57322810ShselaskyMODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
58322810ShselaskyMODULE_LICENSE("Dual BSD/GPL");
59322810ShselaskyMODULE_DEPEND(mlx5ib, linuxkpi, 1, 1, 1);
60322810ShselaskyMODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
61322810ShselaskyMODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
62322810ShselaskyMODULE_VERSION(mlx5ib, 1);
63322810Shselasky
64322810Shselaskystatic int deprecated_prof_sel = 2;
65322810Shselaskymodule_param_named(prof_sel, deprecated_prof_sel, int, 0444);
66322810ShselaskyMODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
67322810Shselasky
68322810Shselaskyenum {
69322810Shselasky	MLX5_STANDARD_ATOMIC_SIZE = 0x8,
70322810Shselasky};
71322810Shselasky
72322810Shselaskystruct workqueue_struct *mlx5_ib_wq;
73322810Shselasky
74322810Shselaskystatic char mlx5_version[] =
75322810Shselasky	DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
76322810Shselasky	DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
77322810Shselasky
78322810Shselaskystatic void get_atomic_caps(struct mlx5_ib_dev *dev,
79322810Shselasky			    struct ib_device_attr *props)
80322810Shselasky{
81322810Shselasky	int tmp;
82322810Shselasky	u8 atomic_operations;
83322810Shselasky	u8 atomic_size_qp;
84322810Shselasky	u8 atomic_req_endianess;
85322810Shselasky
86322810Shselasky	atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
87322810Shselasky	atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
88322810Shselasky	atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev,
89322810Shselasky					       atomic_req_8B_endianess_mode) ||
90322810Shselasky			       !mlx5_host_is_le();
91322810Shselasky
92322810Shselasky	tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
93322810Shselasky	if (((atomic_operations & tmp) == tmp)
94322810Shselasky	    && (atomic_size_qp & 8)) {
95322810Shselasky		if (atomic_req_endianess) {
96322810Shselasky			props->atomic_cap = IB_ATOMIC_HCA;
97322810Shselasky		} else {
98322810Shselasky			props->atomic_cap = IB_ATOMIC_NONE;
99322810Shselasky		}
100322810Shselasky	} else {
101322810Shselasky		props->atomic_cap = IB_ATOMIC_NONE;
102322810Shselasky	}
103322810Shselasky
104322810Shselasky	tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD;
105322810Shselasky	if (((atomic_operations & tmp) == tmp)
106322810Shselasky	    &&(atomic_size_qp & 8)) {
107322810Shselasky		if (atomic_req_endianess)
108322810Shselasky			props->masked_atomic_cap = IB_ATOMIC_HCA;
109322810Shselasky		else {
110322810Shselasky			props->masked_atomic_cap = IB_ATOMIC_NONE;
111322810Shselasky		}
112322810Shselasky	} else {
113322810Shselasky		props->masked_atomic_cap = IB_ATOMIC_NONE;
114322810Shselasky	}
115322810Shselasky}
116322810Shselasky
117322810Shselaskystatic enum rdma_link_layer
118322810Shselaskymlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
119322810Shselasky{
120322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(device);
121322810Shselasky
122322810Shselasky	switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
123322810Shselasky	case MLX5_CAP_PORT_TYPE_IB:
124322810Shselasky		return IB_LINK_LAYER_INFINIBAND;
125322810Shselasky	case MLX5_CAP_PORT_TYPE_ETH:
126322810Shselasky		return IB_LINK_LAYER_ETHERNET;
127322810Shselasky	default:
128322810Shselasky		return IB_LINK_LAYER_UNSPECIFIED;
129322810Shselasky	}
130322810Shselasky}
131322810Shselasky
132322810Shselaskystatic int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
133322810Shselasky{
134322810Shselasky	return !dev->mdev->issi;
135322810Shselasky}
136322810Shselasky
137322810Shselaskyenum {
138322810Shselasky	MLX5_VPORT_ACCESS_METHOD_MAD,
139322810Shselasky	MLX5_VPORT_ACCESS_METHOD_HCA,
140322810Shselasky	MLX5_VPORT_ACCESS_METHOD_NIC,
141322810Shselasky};
142322810Shselasky
143322810Shselaskystatic int mlx5_get_vport_access_method(struct ib_device *ibdev)
144322810Shselasky{
145322810Shselasky	if (mlx5_use_mad_ifc(to_mdev(ibdev)))
146322810Shselasky		return MLX5_VPORT_ACCESS_METHOD_MAD;
147322810Shselasky
148322810Shselasky	if (mlx5_ib_port_link_layer(ibdev, 1) ==
149322810Shselasky	    IB_LINK_LAYER_ETHERNET)
150322810Shselasky		return MLX5_VPORT_ACCESS_METHOD_NIC;
151322810Shselasky
152322810Shselasky	return MLX5_VPORT_ACCESS_METHOD_HCA;
153322810Shselasky}
154322810Shselasky
155322810Shselaskystatic int mlx5_query_system_image_guid(struct ib_device *ibdev,
156322810Shselasky					__be64 *sys_image_guid)
157322810Shselasky{
158322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
159322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
160322810Shselasky	u64 tmp;
161322810Shselasky	int err;
162322810Shselasky
163322810Shselasky	switch (mlx5_get_vport_access_method(ibdev)) {
164322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
165322810Shselasky		return mlx5_query_system_image_guid_mad_ifc(ibdev,
166322810Shselasky							    sys_image_guid);
167322810Shselasky
168322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
169322810Shselasky		err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
170322810Shselasky		if (!err)
171322810Shselasky			*sys_image_guid = cpu_to_be64(tmp);
172322810Shselasky		return err;
173322810Shselasky
174322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
175322810Shselasky		err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
176322810Shselasky		if (!err)
177322810Shselasky			*sys_image_guid = cpu_to_be64(tmp);
178322810Shselasky		return err;
179322810Shselasky
180322810Shselasky	default:
181322810Shselasky		return -EINVAL;
182322810Shselasky	}
183322810Shselasky}
184322810Shselasky
185322810Shselaskystatic int mlx5_query_max_pkeys(struct ib_device *ibdev,
186322810Shselasky				u16 *max_pkeys)
187322810Shselasky{
188322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
189322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
190322810Shselasky
191322810Shselasky	switch (mlx5_get_vport_access_method(ibdev)) {
192322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
193322810Shselasky		return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys);
194322810Shselasky
195322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
196322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
197322810Shselasky		*max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
198322810Shselasky						pkey_table_size));
199322810Shselasky		return 0;
200322810Shselasky
201322810Shselasky	default:
202322810Shselasky		return -EINVAL;
203322810Shselasky	}
204322810Shselasky}
205322810Shselasky
206322810Shselaskystatic int mlx5_query_vendor_id(struct ib_device *ibdev,
207322810Shselasky				u32 *vendor_id)
208322810Shselasky{
209322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
210322810Shselasky
211322810Shselasky	switch (mlx5_get_vport_access_method(ibdev)) {
212322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
213322810Shselasky		return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id);
214322810Shselasky
215322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
216322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
217322810Shselasky		return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
218322810Shselasky
219322810Shselasky	default:
220322810Shselasky		return -EINVAL;
221322810Shselasky	}
222322810Shselasky}
223322810Shselasky
224322810Shselaskystatic int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
225322810Shselasky				__be64 *node_guid)
226322810Shselasky{
227322810Shselasky	u64 tmp;
228322810Shselasky	int err;
229322810Shselasky
230322810Shselasky	switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
231322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
232322810Shselasky		return mlx5_query_node_guid_mad_ifc(dev, node_guid);
233322810Shselasky
234322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
235322810Shselasky		err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
236322810Shselasky		if (!err)
237322810Shselasky			*node_guid = cpu_to_be64(tmp);
238322810Shselasky		return err;
239322810Shselasky
240322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
241322810Shselasky		err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
242322810Shselasky		if (!err)
243322810Shselasky			*node_guid = cpu_to_be64(tmp);
244322810Shselasky		return err;
245322810Shselasky
246322810Shselasky	default:
247322810Shselasky		return -EINVAL;
248322810Shselasky	}
249322810Shselasky}
250322810Shselasky
251322810Shselaskystruct mlx5_reg_node_desc {
252322810Shselasky	u8	desc[64];
253322810Shselasky};
254322810Shselasky
255322810Shselaskystatic int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
256322810Shselasky{
257322810Shselasky	struct mlx5_reg_node_desc in;
258322810Shselasky
259322810Shselasky	if (mlx5_use_mad_ifc(dev))
260322810Shselasky		return mlx5_query_node_desc_mad_ifc(dev, node_desc);
261322810Shselasky
262322810Shselasky	memset(&in, 0, sizeof(in));
263322810Shselasky
264322810Shselasky	return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
265322810Shselasky				    sizeof(struct mlx5_reg_node_desc),
266322810Shselasky				    MLX5_REG_NODE_DESC, 0, 0);
267322810Shselasky}
268322810Shselasky
269322810Shselaskystatic int mlx5_ib_query_device(struct ib_device *ibdev,
270322810Shselasky				struct ib_device_attr *props)
271322810Shselasky{
272322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
273322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
274322810Shselasky	int max_sq_desc;
275322810Shselasky	int max_rq_sg;
276322810Shselasky	int max_sq_sg;
277322810Shselasky	int err;
278322810Shselasky
279322810Shselasky
280322810Shselasky	memset(props, 0, sizeof(*props));
281322810Shselasky
282322810Shselasky	err = mlx5_query_system_image_guid(ibdev,
283322810Shselasky					   &props->sys_image_guid);
284322810Shselasky	if (err)
285322810Shselasky		return err;
286322810Shselasky
287322810Shselasky	err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
288322810Shselasky	if (err)
289322810Shselasky		return err;
290322810Shselasky
291322810Shselasky	err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
292322810Shselasky	if (err)
293322810Shselasky		return err;
294322810Shselasky
295322810Shselasky	props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
296322810Shselasky		((u64)fw_rev_min(dev->mdev) << 16) |
297322810Shselasky		fw_rev_sub(dev->mdev);
298322810Shselasky	props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
299322810Shselasky		IB_DEVICE_PORT_ACTIVE_EVENT		|
300322810Shselasky		IB_DEVICE_SYS_IMAGE_GUID		|
301322810Shselasky		IB_DEVICE_RC_RNR_NAK_GEN;
302322810Shselasky
303322810Shselasky	if (MLX5_CAP_GEN(mdev, pkv))
304322810Shselasky		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
305322810Shselasky	if (MLX5_CAP_GEN(mdev, qkv))
306322810Shselasky		props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
307322810Shselasky	if (MLX5_CAP_GEN(mdev, apm))
308322810Shselasky		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
309322810Shselasky	props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
310322810Shselasky	if (MLX5_CAP_GEN(mdev, xrc))
311322810Shselasky		props->device_cap_flags |= IB_DEVICE_XRC;
312322810Shselasky	props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
313322810Shselasky	if (MLX5_CAP_GEN(mdev, block_lb_mc))
314322810Shselasky		props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
315322810Shselasky
316322810Shselasky	props->vendor_part_id	   = mdev->pdev->device;
317322810Shselasky	props->hw_ver		   = mdev->pdev->revision;
318322810Shselasky
319322810Shselasky	props->max_mr_size	   = ~0ull;
320322810Shselasky	props->page_size_cap	   = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1);
321322810Shselasky	props->max_qp		   = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
322322810Shselasky	props->max_qp_wr	   = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
323322810Shselasky	max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
324322810Shselasky		     sizeof(struct mlx5_wqe_data_seg);
325322810Shselasky	max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
326322810Shselasky	max_sq_sg = (max_sq_desc -
327322810Shselasky		     sizeof(struct mlx5_wqe_ctrl_seg) -
328322810Shselasky		     sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg);
329322810Shselasky	props->max_sge = min(max_rq_sg, max_sq_sg);
330322810Shselasky	props->max_cq		   = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
331322810Shselasky	props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
332322810Shselasky	props->max_mr		   = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
333322810Shselasky	props->max_pd		   = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
334322810Shselasky	props->max_qp_rd_atom	   = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
335322810Shselasky	props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
336322810Shselasky	props->max_srq		   = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
337322810Shselasky	props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
338322810Shselasky	props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
339322810Shselasky	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
340322810Shselasky	props->max_srq_sge	   = max_rq_sg - 1;
341322810Shselasky	props->max_fast_reg_page_list_len = (unsigned int)-1;
342322810Shselasky	get_atomic_caps(dev, props);
343322810Shselasky	props->max_mcast_grp	   = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
344322810Shselasky	props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
345322810Shselasky	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
346322810Shselasky					   props->max_mcast_grp;
347322810Shselasky	props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
348322810Shselasky	props->max_ah		= INT_MAX;
349322810Shselasky
350322810Shselasky	return 0;
351322810Shselasky}
352322810Shselasky
353322810Shselaskyenum mlx5_ib_width {
354322810Shselasky	MLX5_IB_WIDTH_1X	= 1 << 0,
355322810Shselasky	MLX5_IB_WIDTH_2X	= 1 << 1,
356322810Shselasky	MLX5_IB_WIDTH_4X	= 1 << 2,
357322810Shselasky	MLX5_IB_WIDTH_8X	= 1 << 3,
358322810Shselasky	MLX5_IB_WIDTH_12X	= 1 << 4
359322810Shselasky};
360322810Shselasky
361322810Shselaskystatic int translate_active_width(struct ib_device *ibdev, u8 active_width,
362322810Shselasky				  u8 *ib_width)
363322810Shselasky{
364322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
365322810Shselasky	int err = 0;
366322810Shselasky
367322810Shselasky	if (active_width & MLX5_IB_WIDTH_1X) {
368322810Shselasky		*ib_width = IB_WIDTH_1X;
369322810Shselasky	} else if (active_width & MLX5_IB_WIDTH_2X) {
370322810Shselasky		mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n",
371322810Shselasky			     (int)active_width);
372322810Shselasky		err = -EINVAL;
373322810Shselasky	} else if (active_width & MLX5_IB_WIDTH_4X) {
374322810Shselasky		*ib_width = IB_WIDTH_4X;
375322810Shselasky	} else if (active_width & MLX5_IB_WIDTH_8X) {
376322810Shselasky		*ib_width = IB_WIDTH_8X;
377322810Shselasky	} else if (active_width & MLX5_IB_WIDTH_12X) {
378322810Shselasky		*ib_width = IB_WIDTH_12X;
379322810Shselasky	} else {
380322810Shselasky		mlx5_ib_dbg(dev, "Invalid active_width %d\n",
381322810Shselasky			    (int)active_width);
382322810Shselasky		err = -EINVAL;
383322810Shselasky	}
384322810Shselasky
385322810Shselasky	return err;
386322810Shselasky}
387322810Shselasky
388322810Shselasky/*
389322810Shselasky * TODO: Move to IB core
390322810Shselasky */
391322810Shselaskyenum ib_max_vl_num {
392322810Shselasky	__IB_MAX_VL_0		= 1,
393322810Shselasky	__IB_MAX_VL_0_1		= 2,
394322810Shselasky	__IB_MAX_VL_0_3		= 3,
395322810Shselasky	__IB_MAX_VL_0_7		= 4,
396322810Shselasky	__IB_MAX_VL_0_14	= 5,
397322810Shselasky};
398322810Shselasky
399322810Shselaskyenum mlx5_vl_hw_cap {
400322810Shselasky	MLX5_VL_HW_0	= 1,
401322810Shselasky	MLX5_VL_HW_0_1	= 2,
402322810Shselasky	MLX5_VL_HW_0_2	= 3,
403322810Shselasky	MLX5_VL_HW_0_3	= 4,
404322810Shselasky	MLX5_VL_HW_0_4	= 5,
405322810Shselasky	MLX5_VL_HW_0_5	= 6,
406322810Shselasky	MLX5_VL_HW_0_6	= 7,
407322810Shselasky	MLX5_VL_HW_0_7	= 8,
408322810Shselasky	MLX5_VL_HW_0_14	= 15
409322810Shselasky};
410322810Shselasky
411322810Shselaskystatic int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
412322810Shselasky				u8 *max_vl_num)
413322810Shselasky{
414322810Shselasky	switch (vl_hw_cap) {
415322810Shselasky	case MLX5_VL_HW_0:
416322810Shselasky		*max_vl_num = __IB_MAX_VL_0;
417322810Shselasky		break;
418322810Shselasky	case MLX5_VL_HW_0_1:
419322810Shselasky		*max_vl_num = __IB_MAX_VL_0_1;
420322810Shselasky		break;
421322810Shselasky	case MLX5_VL_HW_0_3:
422322810Shselasky		*max_vl_num = __IB_MAX_VL_0_3;
423322810Shselasky		break;
424322810Shselasky	case MLX5_VL_HW_0_7:
425322810Shselasky		*max_vl_num = __IB_MAX_VL_0_7;
426322810Shselasky		break;
427322810Shselasky	case MLX5_VL_HW_0_14:
428322810Shselasky		*max_vl_num = __IB_MAX_VL_0_14;
429322810Shselasky		break;
430322810Shselasky
431322810Shselasky	default:
432322810Shselasky		return -EINVAL;
433322810Shselasky	}
434322810Shselasky
435322810Shselasky	return 0;
436322810Shselasky}
437322810Shselasky
438322810Shselaskystatic int mlx5_query_port_ib(struct ib_device *ibdev, u8 port,
439322810Shselasky			      struct ib_port_attr *props)
440322810Shselasky{
441322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
442322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
443322810Shselasky	u32 *rep;
444322810Shselasky	int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
445322810Shselasky	struct mlx5_ptys_reg *ptys;
446322810Shselasky	struct mlx5_pmtu_reg *pmtu;
447322810Shselasky	struct mlx5_pvlc_reg pvlc;
448322810Shselasky	void *ctx;
449322810Shselasky	int err;
450322810Shselasky
451322810Shselasky	rep = mlx5_vzalloc(outlen);
452322810Shselasky	ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
453322810Shselasky	pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
454322810Shselasky	if (!rep || !ptys || !pmtu) {
455322810Shselasky		err = -ENOMEM;
456322810Shselasky		goto out;
457322810Shselasky	}
458322810Shselasky
459322810Shselasky	memset(props, 0, sizeof(*props));
460322810Shselasky
461322810Shselasky	/* what if I am pf with dual port */
462322810Shselasky	err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen);
463322810Shselasky	if (err)
464322810Shselasky		goto out;
465322810Shselasky
466322810Shselasky	ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
467322810Shselasky
468322810Shselasky	props->lid		= MLX5_GET(hca_vport_context, ctx, lid);
469322810Shselasky	props->lmc		= MLX5_GET(hca_vport_context, ctx, lmc);
470322810Shselasky	props->sm_lid		= MLX5_GET(hca_vport_context, ctx, sm_lid);
471322810Shselasky	props->sm_sl		= MLX5_GET(hca_vport_context, ctx, sm_sl);
472322810Shselasky	props->state		= MLX5_GET(hca_vport_context, ctx, vport_state);
473322810Shselasky	props->phys_state	= MLX5_GET(hca_vport_context, ctx,
474322810Shselasky					port_physical_state);
475322810Shselasky	props->port_cap_flags	= MLX5_GET(hca_vport_context, ctx, cap_mask1);
476322810Shselasky	props->gid_tbl_len	= mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
477322810Shselasky	props->max_msg_sz	= 1 << MLX5_CAP_GEN(mdev, log_max_msg);
478322810Shselasky	props->pkey_tbl_len	= mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
479322810Shselasky	props->bad_pkey_cntr	= MLX5_GET(hca_vport_context, ctx,
480322810Shselasky					      pkey_violation_counter);
481322810Shselasky	props->qkey_viol_cntr	= MLX5_GET(hca_vport_context, ctx,
482322810Shselasky					      qkey_violation_counter);
483322810Shselasky	props->subnet_timeout	= MLX5_GET(hca_vport_context, ctx,
484322810Shselasky					      subnet_timeout);
485322810Shselasky	props->init_type_reply	= MLX5_GET(hca_vport_context, ctx,
486322810Shselasky					   init_type_reply);
487322810Shselasky
488322810Shselasky	ptys->proto_mask |= MLX5_PTYS_IB;
489322810Shselasky	ptys->local_port = port;
490322810Shselasky	err = mlx5_core_access_ptys(mdev, ptys, 0);
491322810Shselasky	if (err)
492322810Shselasky		goto out;
493322810Shselasky
494322810Shselasky	err = translate_active_width(ibdev, ptys->ib_link_width_oper,
495322810Shselasky				     &props->active_width);
496322810Shselasky	if (err)
497322810Shselasky		goto out;
498322810Shselasky
499322810Shselasky	props->active_speed	= (u8)ptys->ib_proto_oper;
500322810Shselasky
501322810Shselasky	pmtu->local_port = port;
502322810Shselasky	err = mlx5_core_access_pmtu(mdev, pmtu, 0);
503322810Shselasky	if (err)
504322810Shselasky		goto out;
505322810Shselasky
506322810Shselasky	props->max_mtu		= pmtu->max_mtu;
507322810Shselasky	props->active_mtu	= pmtu->oper_mtu;
508322810Shselasky
509322810Shselasky	memset(&pvlc, 0, sizeof(pvlc));
510322810Shselasky	pvlc.local_port = port;
511322810Shselasky	err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
512322810Shselasky	if (err)
513322810Shselasky		goto out;
514322810Shselasky
515322810Shselasky	err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
516322810Shselasky				   &props->max_vl_num);
517322810Shselaskyout:
518322810Shselasky	kvfree(rep);
519322810Shselasky	kfree(ptys);
520322810Shselasky	kfree(pmtu);
521322810Shselasky	return err;
522322810Shselasky}
523322810Shselasky
524322810Shselaskyint mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
525322810Shselasky		       struct ib_port_attr *props)
526322810Shselasky{
527322810Shselasky	switch (mlx5_get_vport_access_method(ibdev)) {
528322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
529322810Shselasky		return mlx5_query_port_mad_ifc(ibdev, port, props);
530322810Shselasky
531322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
532322810Shselasky		return mlx5_query_port_ib(ibdev, port, props);
533322810Shselasky
534322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
535322810Shselasky		return mlx5_query_port_roce(ibdev, port, props);
536322810Shselasky
537322810Shselasky	default:
538322810Shselasky		return -EINVAL;
539322810Shselasky	}
540322810Shselasky}
541322810Shselasky
542322810Shselaskystatic inline int
543322810Shselaskymlx5_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
544322810Shselasky{
545322810Shselasky	if (dev->if_addrlen != ETH_ALEN)
546322810Shselasky		return -1;
547322810Shselasky	memcpy(eui, IF_LLADDR(dev), 3);
548322810Shselasky	memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
549322810Shselasky
550322810Shselasky	/* NOTE: The scope ID is added by the GID to IP conversion */
551322810Shselasky
552322810Shselasky	eui[3] = 0xFF;
553322810Shselasky	eui[4] = 0xFE;
554322810Shselasky	eui[0] ^= 2;
555322810Shselasky	return 0;
556322810Shselasky}
557322810Shselasky
558322810Shselaskystatic void
559322810Shselaskymlx5_make_default_gid(struct net_device *dev, union ib_gid *gid)
560322810Shselasky{
561322810Shselasky	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
562322810Shselasky	mlx5_addrconf_ifid_eui48(&gid->raw[8], dev);
563322810Shselasky}
564322810Shselasky
565322810Shselaskystatic inline int
566322810Shselaskymlx5_ip2gid(const struct sockaddr *addr, union ib_gid *gid)
567322810Shselasky{
568322810Shselasky	switch (addr->sa_family) {
569322810Shselasky	case AF_INET:
570322810Shselasky		ipv6_addr_set_v4mapped(((const struct sockaddr_in *)addr)->sin_addr.s_addr,
571322810Shselasky		    (struct in6_addr *)gid->raw);
572322810Shselasky		break;
573322810Shselasky	case AF_INET6:
574322810Shselasky		memcpy(gid->raw, &((const struct sockaddr_in6 *)addr)->sin6_addr, 16);
575322810Shselasky		/* clear SCOPE ID */
576322810Shselasky		gid->raw[2] = 0;
577322810Shselasky		gid->raw[3] = 0;
578322810Shselasky		break;
579322810Shselasky	default:
580322810Shselasky		return -EINVAL;
581322810Shselasky	}
582322810Shselasky	return 0;
583322810Shselasky}
584322810Shselasky
585322810Shselaskystatic void
586322810Shselaskymlx5_ib_roce_port_update(void *arg)
587322810Shselasky{
588322810Shselasky	struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg;
589322810Shselasky	struct mlx5_ib_dev *dev = port->dev;
590322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
591322810Shselasky	struct net_device *xdev[MLX5_IB_GID_MAX];
592322810Shselasky	struct net_device *idev;
593322810Shselasky	struct net_device *ndev;
594322810Shselasky	struct ifaddr *ifa;
595322810Shselasky	union ib_gid gid_temp;
596322810Shselasky
597322810Shselasky	while (port->port_gone == 0) {
598322810Shselasky		int update = 0;
599322810Shselasky		int gid_index = 0;
600322810Shselasky		int j;
601322810Shselasky		int error;
602322810Shselasky
603322810Shselasky		ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH);
604322810Shselasky		if (ndev == NULL) {
605322810Shselasky			pause("W", hz);
606322810Shselasky			continue;
607322810Shselasky		}
608322810Shselasky
609322810Shselasky		CURVNET_SET_QUIET(ndev->if_vnet);
610322810Shselasky
611322810Shselasky		memset(&gid_temp, 0, sizeof(gid_temp));
612322810Shselasky		mlx5_make_default_gid(ndev, &gid_temp);
613322810Shselasky		if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
614322810Shselasky			port->gid_table[gid_index] = gid_temp;
615322810Shselasky			update = 1;
616322810Shselasky		}
617322810Shselasky		xdev[gid_index] = ndev;
618322810Shselasky		gid_index++;
619322810Shselasky
620322810Shselasky		IFNET_RLOCK();
621322810Shselasky		TAILQ_FOREACH(idev, &V_ifnet, if_link) {
622322810Shselasky			if (idev == ndev)
623322810Shselasky				break;
624322810Shselasky		}
625322810Shselasky		if (idev != NULL) {
626322810Shselasky		    TAILQ_FOREACH(idev, &V_ifnet, if_link) {
627322810Shselasky			if (idev != ndev) {
628322810Shselasky				if (idev->if_type != IFT_L2VLAN)
629322810Shselasky					continue;
630322810Shselasky				if (ndev != rdma_vlan_dev_real_dev(idev))
631322810Shselasky					continue;
632322810Shselasky			}
633322810Shselasky			/* clone address information for IPv4 and IPv6 */
634322810Shselasky			IF_ADDR_RLOCK(idev);
635322810Shselasky			TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
636322810Shselasky				if (ifa->ifa_addr == NULL ||
637322810Shselasky				    (ifa->ifa_addr->sa_family != AF_INET &&
638322810Shselasky				     ifa->ifa_addr->sa_family != AF_INET6) ||
639322810Shselasky				    gid_index >= MLX5_IB_GID_MAX)
640322810Shselasky					continue;
641322810Shselasky				memset(&gid_temp, 0, sizeof(gid_temp));
642322810Shselasky				mlx5_ip2gid(ifa->ifa_addr, &gid_temp);
643322810Shselasky				/* check for existing entry */
644322810Shselasky				for (j = 0; j != gid_index; j++) {
645322810Shselasky					if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0)
646322810Shselasky						break;
647322810Shselasky				}
648322810Shselasky				/* check if new entry must be added */
649322810Shselasky				if (j == gid_index) {
650322810Shselasky					if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
651322810Shselasky						port->gid_table[gid_index] = gid_temp;
652322810Shselasky						update = 1;
653322810Shselasky					}
654322810Shselasky					xdev[gid_index] = idev;
655322810Shselasky					gid_index++;
656322810Shselasky				}
657322810Shselasky			}
658322810Shselasky			IF_ADDR_RUNLOCK(idev);
659322810Shselasky		    }
660322810Shselasky		}
661322810Shselasky		IFNET_RUNLOCK();
662322810Shselasky		CURVNET_RESTORE();
663322810Shselasky
664322810Shselasky		if (update != 0 &&
665322810Shselasky		    mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) {
666322810Shselasky			struct ib_event event = {
667322810Shselasky			    .device = &dev->ib_dev,
668322810Shselasky			    .element.port_num = port->port_num + 1,
669322810Shselasky			    .event = IB_EVENT_GID_CHANGE,
670322810Shselasky			};
671322810Shselasky
672322810Shselasky			/* add new entries, if any */
673322810Shselasky			for (j = 0; j != gid_index; j++) {
674322810Shselasky				error = modify_gid_roce(&dev->ib_dev, port->port_num, j,
675322810Shselasky				    port->gid_table + j, xdev[j]);
676322810Shselasky				if (error != 0)
677322810Shselasky					printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error);
678322810Shselasky			}
679322810Shselasky			memset(&gid_temp, 0, sizeof(gid_temp));
680322810Shselasky
681322810Shselasky			/* clear old entries, if any */
682322810Shselasky			for (; j != MLX5_IB_GID_MAX; j++) {
683322810Shselasky				if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0)
684322810Shselasky					continue;
685322810Shselasky				port->gid_table[j] = gid_temp;
686322810Shselasky				(void) modify_gid_roce(&dev->ib_dev, port->port_num, j,
687322810Shselasky				    port->gid_table + j, ndev);
688322810Shselasky			}
689322810Shselasky
690322810Shselasky			/* make sure ibcore gets updated */
691322810Shselasky			ib_dispatch_event(&event);
692322810Shselasky		}
693322810Shselasky		pause("W", hz);
694322810Shselasky	}
695322810Shselasky	do {
696322810Shselasky		struct ib_event event = {
697322810Shselasky			.device = &dev->ib_dev,
698322810Shselasky			.element.port_num = port->port_num + 1,
699322810Shselasky			.event = IB_EVENT_GID_CHANGE,
700322810Shselasky		};
701322810Shselasky		/* make sure ibcore gets updated */
702322810Shselasky		ib_dispatch_event(&event);
703322810Shselasky
704322810Shselasky		/* wait a bit */
705322810Shselasky		pause("W", hz);
706322810Shselasky	} while (0);
707322810Shselasky	port->port_gone = 2;
708322810Shselasky	kthread_exit();
709322810Shselasky}
710322810Shselasky
711322810Shselaskystatic int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
712322810Shselasky			     union ib_gid *gid)
713322810Shselasky{
714322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
715322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
716322810Shselasky
717322810Shselasky	switch (mlx5_get_vport_access_method(ibdev)) {
718322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
719322810Shselasky		return mlx5_query_gids_mad_ifc(ibdev, port, index, gid);
720322810Shselasky
721322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
722322810Shselasky		return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
723322810Shselasky
724322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
725322810Shselasky		if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) ||
726322810Shselasky		    index < 0 || index >= MLX5_IB_GID_MAX ||
727322810Shselasky		    dev->port[port - 1].port_gone != 0)
728322810Shselasky			memset(gid, 0, sizeof(*gid));
729322810Shselasky		else
730322810Shselasky			*gid = dev->port[port - 1].gid_table[index];
731322810Shselasky		return 0;
732322810Shselasky
733322810Shselasky	default:
734322810Shselasky		return -EINVAL;
735322810Shselasky	}
736322810Shselasky}
737322810Shselasky
738322810Shselaskystatic int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
739322810Shselasky			      u16 *pkey)
740322810Shselasky{
741322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
742322810Shselasky	struct mlx5_core_dev *mdev = dev->mdev;
743322810Shselasky
744322810Shselasky	switch (mlx5_get_vport_access_method(ibdev)) {
745322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_MAD:
746322810Shselasky		return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey);
747322810Shselasky
748322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_HCA:
749322810Shselasky	case MLX5_VPORT_ACCESS_METHOD_NIC:
750322810Shselasky		return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
751322810Shselasky						 pkey);
752322810Shselasky
753322810Shselasky	default:
754322810Shselasky		return -EINVAL;
755322810Shselasky	}
756322810Shselasky}
757322810Shselasky
758322810Shselaskystatic int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
759322810Shselasky				 struct ib_device_modify *props)
760322810Shselasky{
761322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
762322810Shselasky	struct mlx5_reg_node_desc in;
763322810Shselasky	struct mlx5_reg_node_desc out;
764322810Shselasky	int err;
765322810Shselasky
766322810Shselasky	if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
767322810Shselasky		return -EOPNOTSUPP;
768322810Shselasky
769322810Shselasky	if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
770322810Shselasky		return 0;
771322810Shselasky
772322810Shselasky	/*
773322810Shselasky	 * If possible, pass node desc to FW, so it can generate
774322810Shselasky	 * a 144 trap.  If cmd fails, just ignore.
775322810Shselasky	 */
776322810Shselasky	memcpy(&in, props->node_desc, 64);
777322810Shselasky	err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
778322810Shselasky				   sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
779322810Shselasky	if (err)
780322810Shselasky		return err;
781322810Shselasky
782322810Shselasky	memcpy(ibdev->node_desc, props->node_desc, 64);
783322810Shselasky
784322810Shselasky	return err;
785322810Shselasky}
786322810Shselasky
787322810Shselaskystatic int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
788322810Shselasky			       struct ib_port_modify *props)
789322810Shselasky{
790322810Shselasky	u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) ==
791322810Shselasky		     IB_LINK_LAYER_ETHERNET);
792322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
793322810Shselasky	struct ib_port_attr attr;
794322810Shselasky	u32 tmp;
795322810Shselasky	int err;
796322810Shselasky
797322810Shselasky	/* return OK if this is RoCE. CM calls ib_modify_port() regardless
798322810Shselasky	 * of whether port link layer is ETH or IB. For ETH ports, qkey
799322810Shselasky	 * violations and port capabilities are not valid.
800322810Shselasky	 */
801322810Shselasky	if (is_eth)
802322810Shselasky		return 0;
803322810Shselasky
804322810Shselasky	mutex_lock(&dev->cap_mask_mutex);
805322810Shselasky
806322810Shselasky	err = mlx5_ib_query_port(ibdev, port, &attr);
807322810Shselasky	if (err)
808322810Shselasky		goto out;
809322810Shselasky
810322810Shselasky	tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
811322810Shselasky		~props->clr_port_cap_mask;
812322810Shselasky
813322810Shselasky	err = mlx5_set_port_caps(dev->mdev, port, tmp);
814322810Shselasky
815322810Shselaskyout:
816322810Shselasky	mutex_unlock(&dev->cap_mask_mutex);
817322810Shselasky	return err;
818322810Shselasky}
819322810Shselasky
820322810Shselaskyenum mlx5_cap_flags {
821322810Shselasky	MLX5_CAP_COMPACT_AV = 1 << 0,
822322810Shselasky};
823322810Shselasky
824322810Shselaskystatic void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev)
825322810Shselasky{
826322810Shselasky	*flags |= MLX5_CAP_GEN(dev, compact_address_vector) ?
827322810Shselasky		  MLX5_CAP_COMPACT_AV : 0;
828322810Shselasky}
829322810Shselasky
830322810Shselaskystatic struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
831322810Shselasky						  struct ib_udata *udata)
832322810Shselasky{
833322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
834322810Shselasky	struct mlx5_ib_alloc_ucontext_req_v2 req;
835322810Shselasky	struct mlx5_ib_alloc_ucontext_resp resp;
836322810Shselasky	struct mlx5_ib_ucontext *context;
837322810Shselasky	struct mlx5_uuar_info *uuari;
838322810Shselasky	struct mlx5_uar *uars;
839322810Shselasky	int gross_uuars;
840322810Shselasky	int num_uars;
841322810Shselasky	int ver;
842322810Shselasky	int uuarn;
843322810Shselasky	int err;
844322810Shselasky	int i;
845322810Shselasky	size_t reqlen;
846322810Shselasky
847322810Shselasky	if (!dev->ib_active)
848322810Shselasky		return ERR_PTR(-EAGAIN);
849322810Shselasky
850322810Shselasky	memset(&req, 0, sizeof(req));
851322810Shselasky	memset(&resp, 0, sizeof(resp));
852322810Shselasky
853322810Shselasky	reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
854322810Shselasky	if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
855322810Shselasky		ver = 0;
856322810Shselasky	else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
857322810Shselasky		ver = 2;
858322810Shselasky	else {
859322810Shselasky		mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", reqlen);
860322810Shselasky		return ERR_PTR(-EINVAL);
861322810Shselasky	}
862322810Shselasky
863322810Shselasky	err = ib_copy_from_udata(&req, udata, reqlen);
864322810Shselasky	if (err) {
865322810Shselasky		mlx5_ib_err(dev, "copy failed\n");
866322810Shselasky		return ERR_PTR(err);
867322810Shselasky	}
868322810Shselasky
869322810Shselasky	if (req.reserved) {
870322810Shselasky		mlx5_ib_err(dev, "request corrupted\n");
871322810Shselasky		return ERR_PTR(-EINVAL);
872322810Shselasky	}
873322810Shselasky
874322810Shselasky	if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) {
875322810Shselasky		mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars);
876322810Shselasky		return ERR_PTR(-ENOMEM);
877322810Shselasky	}
878322810Shselasky
879322810Shselasky	req.total_num_uuars = ALIGN(req.total_num_uuars,
880322810Shselasky				    MLX5_NON_FP_BF_REGS_PER_PAGE);
881322810Shselasky	if (req.num_low_latency_uuars > req.total_num_uuars - 1) {
882322810Shselasky		mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n",
883322810Shselasky			     req.total_num_uuars, req.total_num_uuars);
884322810Shselasky		return ERR_PTR(-EINVAL);
885322810Shselasky	}
886322810Shselasky
887322810Shselasky	num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
888322810Shselasky	gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
889322810Shselasky	resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
890322810Shselasky	if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
891322810Shselasky		resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
892322810Shselasky	resp.cache_line_size = L1_CACHE_BYTES;
893322810Shselasky	resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
894322810Shselasky	resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
895322810Shselasky	resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
896322810Shselasky	resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
897322810Shselasky	resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
898322810Shselasky	set_mlx5_flags(&resp.flags, dev->mdev);
899322810Shselasky
900322810Shselasky	if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen)
901322810Shselasky		resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc);
902322810Shselasky
903322810Shselasky	if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen)
904322810Shselasky		resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
905322810Shselasky
906322810Shselasky	context = kzalloc(sizeof(*context), GFP_KERNEL);
907322810Shselasky	if (!context)
908322810Shselasky		return ERR_PTR(-ENOMEM);
909322810Shselasky
910322810Shselasky	uuari = &context->uuari;
911322810Shselasky	mutex_init(&uuari->lock);
912322810Shselasky	uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
913322810Shselasky	if (!uars) {
914322810Shselasky		err = -ENOMEM;
915322810Shselasky		goto out_ctx;
916322810Shselasky	}
917322810Shselasky
918322810Shselasky	uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
919322810Shselasky				sizeof(*uuari->bitmap),
920322810Shselasky				GFP_KERNEL);
921322810Shselasky	if (!uuari->bitmap) {
922322810Shselasky		err = -ENOMEM;
923322810Shselasky		goto out_uar_ctx;
924322810Shselasky	}
925322810Shselasky	/*
926322810Shselasky	 * clear all fast path uuars
927322810Shselasky	 */
928322810Shselasky	for (i = 0; i < gross_uuars; i++) {
929322810Shselasky		uuarn = i & 3;
930322810Shselasky		if (uuarn == 2 || uuarn == 3)
931322810Shselasky			set_bit(i, uuari->bitmap);
932322810Shselasky	}
933322810Shselasky
934322810Shselasky	uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
935322810Shselasky	if (!uuari->count) {
936322810Shselasky		err = -ENOMEM;
937322810Shselasky		goto out_bitmap;
938322810Shselasky	}
939322810Shselasky
940322810Shselasky	for (i = 0; i < num_uars; i++) {
941322810Shselasky		err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
942322810Shselasky		if (err) {
943322810Shselasky			mlx5_ib_err(dev, "uar alloc failed at %d\n", i);
944322810Shselasky			goto out_uars;
945322810Shselasky		}
946322810Shselasky	}
947322810Shselasky	for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++)
948322810Shselasky		context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX;
949322810Shselasky
950322810Shselasky	INIT_LIST_HEAD(&context->db_page_list);
951322810Shselasky	mutex_init(&context->db_page_mutex);
952322810Shselasky
953322810Shselasky	resp.tot_uuars = req.total_num_uuars;
954322810Shselasky	resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
955322810Shselasky	err = ib_copy_to_udata(udata, &resp,
956322810Shselasky			       min_t(size_t, udata->outlen, sizeof(resp)));
957322810Shselasky	if (err)
958322810Shselasky		goto out_uars;
959322810Shselasky
960322810Shselasky	uuari->ver = ver;
961322810Shselasky	uuari->num_low_latency_uuars = req.num_low_latency_uuars;
962322810Shselasky	uuari->uars = uars;
963322810Shselasky	uuari->num_uars = num_uars;
964322810Shselasky
965322810Shselasky	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
966322810Shselasky	    IB_LINK_LAYER_ETHERNET) {
967322810Shselasky		err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn);
968322810Shselasky		if (err)
969322810Shselasky			goto out_uars;
970322810Shselasky	}
971322810Shselasky
972322810Shselasky	return &context->ibucontext;
973322810Shselasky
974322810Shselaskyout_uars:
975322810Shselasky	for (i--; i >= 0; i--)
976322810Shselasky		mlx5_cmd_free_uar(dev->mdev, uars[i].index);
977322810Shselasky	kfree(uuari->count);
978322810Shselasky
979322810Shselaskyout_bitmap:
980322810Shselasky	kfree(uuari->bitmap);
981322810Shselasky
982322810Shselaskyout_uar_ctx:
983322810Shselasky	kfree(uars);
984322810Shselasky
985322810Shselaskyout_ctx:
986322810Shselasky	kfree(context);
987322810Shselasky	return ERR_PTR(err);
988322810Shselasky}
989322810Shselasky
990322810Shselaskystatic int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
991322810Shselasky{
992322810Shselasky	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
993322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
994322810Shselasky	struct mlx5_uuar_info *uuari = &context->uuari;
995322810Shselasky	int i;
996322810Shselasky
997322810Shselasky	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
998322810Shselasky	    IB_LINK_LAYER_ETHERNET)
999322810Shselasky		mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
1000322810Shselasky
1001322810Shselasky	for (i = 0; i < uuari->num_uars; i++) {
1002322810Shselasky		if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
1003322810Shselasky			mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
1004322810Shselasky	}
1005322810Shselasky	for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) {
1006322810Shselasky		if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX)
1007322810Shselasky			mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]);
1008322810Shselasky	}
1009322810Shselasky
1010322810Shselasky	kfree(uuari->count);
1011322810Shselasky	kfree(uuari->bitmap);
1012322810Shselasky	kfree(uuari->uars);
1013322810Shselasky	kfree(context);
1014322810Shselasky
1015322810Shselasky	return 0;
1016322810Shselasky}
1017322810Shselasky
1018322810Shselaskystatic phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
1019322810Shselasky{
1020322810Shselasky	return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
1021322810Shselasky}
1022322810Shselasky
1023322810Shselaskystatic int get_command(unsigned long offset)
1024322810Shselasky{
1025322810Shselasky	return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
1026322810Shselasky}
1027322810Shselasky
1028322810Shselaskystatic int get_arg(unsigned long offset)
1029322810Shselasky{
1030322810Shselasky	return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
1031322810Shselasky}
1032322810Shselasky
1033322810Shselaskystatic int get_index(unsigned long offset)
1034322810Shselasky{
1035322810Shselasky	return get_arg(offset);
1036322810Shselasky}
1037322810Shselasky
1038322810Shselaskystatic int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc,
1039322810Shselasky		    struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev,
1040322810Shselasky		    struct mlx5_ib_ucontext *context)
1041322810Shselasky{
1042322810Shselasky	unsigned long idx;
1043322810Shselasky	phys_addr_t pfn;
1044322810Shselasky
1045322810Shselasky	if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
1046322810Shselasky		mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n",
1047322810Shselasky			     (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start));
1048322810Shselasky		return -EINVAL;
1049322810Shselasky	}
1050322810Shselasky
1051322810Shselasky	idx = get_index(vma->vm_pgoff);
1052322810Shselasky	if (idx >= uuari->num_uars) {
1053322810Shselasky		mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n",
1054322810Shselasky			     idx, uuari->num_uars);
1055322810Shselasky		return -EINVAL;
1056322810Shselasky	}
1057322810Shselasky
1058322810Shselasky	pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1059322810Shselasky	mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
1060322810Shselasky		    (unsigned long long)pfn);
1061322810Shselasky
1062322810Shselasky	vma->vm_page_prot = prot;
1063322810Shselasky	if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1064322810Shselasky			       PAGE_SIZE, vma->vm_page_prot)) {
1065322810Shselasky		mlx5_ib_err(dev, "io remap failed\n");
1066322810Shselasky		return -EAGAIN;
1067322810Shselasky	}
1068322810Shselasky
1069322810Shselasky	mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC",
1070322810Shselasky		    (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT);
1071322810Shselasky
1072322810Shselasky	return 0;
1073322810Shselasky}
1074322810Shselasky
1075322810Shselaskystatic int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
1076322810Shselasky{
1077322810Shselasky	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1078322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1079322810Shselasky	struct mlx5_uuar_info *uuari = &context->uuari;
1080322810Shselasky	unsigned long command;
1081322810Shselasky
1082322810Shselasky	command = get_command(vma->vm_pgoff);
1083322810Shselasky	switch (command) {
1084322810Shselasky	case MLX5_IB_MMAP_REGULAR_PAGE:
1085322810Shselasky		return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1086322810Shselasky				true,
1087322810Shselasky				uuari, dev, context);
1088322810Shselasky
1089322810Shselasky		break;
1090322810Shselasky
1091322810Shselasky	case MLX5_IB_MMAP_WC_PAGE:
1092322810Shselasky		return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1093322810Shselasky				true, uuari, dev, context);
1094322810Shselasky		break;
1095322810Shselasky
1096322810Shselasky	case MLX5_IB_MMAP_NC_PAGE:
1097322810Shselasky		return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot),
1098322810Shselasky				false, uuari, dev, context);
1099322810Shselasky		break;
1100322810Shselasky
1101322810Shselasky	default:
1102322810Shselasky		return -EINVAL;
1103322810Shselasky	}
1104322810Shselasky
1105322810Shselasky	return 0;
1106322810Shselasky}
1107322810Shselasky
1108322810Shselaskystatic int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
1109322810Shselasky{
1110322810Shselasky	struct mlx5_create_mkey_mbox_in *in;
1111322810Shselasky	struct mlx5_mkey_seg *seg;
1112322810Shselasky	struct mlx5_core_mr mr;
1113322810Shselasky	int err;
1114322810Shselasky
1115322810Shselasky	in = kzalloc(sizeof(*in), GFP_KERNEL);
1116322810Shselasky	if (!in)
1117322810Shselasky		return -ENOMEM;
1118322810Shselasky
1119322810Shselasky	seg = &in->seg;
1120322810Shselasky	seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
1121322810Shselasky	seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
1122322810Shselasky	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1123322810Shselasky	seg->start_addr = 0;
1124322810Shselasky
1125322810Shselasky	err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
1126322810Shselasky				    NULL, NULL, NULL);
1127322810Shselasky	if (err) {
1128322810Shselasky		mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
1129322810Shselasky		goto err_in;
1130322810Shselasky	}
1131322810Shselasky
1132322810Shselasky	kfree(in);
1133322810Shselasky	*key = mr.key;
1134322810Shselasky
1135322810Shselasky	return 0;
1136322810Shselasky
1137322810Shselaskyerr_in:
1138322810Shselasky	kfree(in);
1139322810Shselasky
1140322810Shselasky	return err;
1141322810Shselasky}
1142322810Shselasky
1143322810Shselaskystatic void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
1144322810Shselasky{
1145322810Shselasky	struct mlx5_core_mr mr;
1146322810Shselasky	int err;
1147322810Shselasky
1148322810Shselasky	memset(&mr, 0, sizeof(mr));
1149322810Shselasky	mr.key = key;
1150322810Shselasky	err = mlx5_core_destroy_mkey(dev->mdev, &mr);
1151322810Shselasky	if (err)
1152322810Shselasky		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
1153322810Shselasky}
1154322810Shselasky
1155322810Shselaskystatic struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
1156322810Shselasky				      struct ib_ucontext *context,
1157322810Shselasky				      struct ib_udata *udata)
1158322810Shselasky{
1159322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibdev);
1160322810Shselasky	struct mlx5_ib_alloc_pd_resp resp;
1161322810Shselasky	struct mlx5_ib_pd *pd;
1162322810Shselasky	int err;
1163322810Shselasky
1164322810Shselasky	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
1165322810Shselasky	if (!pd)
1166322810Shselasky		return ERR_PTR(-ENOMEM);
1167322810Shselasky
1168322810Shselasky	err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
1169322810Shselasky	if (err) {
1170322810Shselasky		mlx5_ib_warn(dev, "pd alloc failed\n");
1171322810Shselasky		kfree(pd);
1172322810Shselasky		return ERR_PTR(err);
1173322810Shselasky	}
1174322810Shselasky
1175322810Shselasky	if (context) {
1176322810Shselasky		resp.pdn = pd->pdn;
1177322810Shselasky		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
1178322810Shselasky			mlx5_ib_err(dev, "copy failed\n");
1179322810Shselasky			mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1180322810Shselasky			kfree(pd);
1181322810Shselasky			return ERR_PTR(-EFAULT);
1182322810Shselasky		}
1183322810Shselasky	} else {
1184322810Shselasky		err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
1185322810Shselasky		if (err) {
1186322810Shselasky			mlx5_ib_err(dev, "alloc mkey failed\n");
1187322810Shselasky			mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1188322810Shselasky			kfree(pd);
1189322810Shselasky			return ERR_PTR(err);
1190322810Shselasky		}
1191322810Shselasky	}
1192322810Shselasky
1193322810Shselasky	return &pd->ibpd;
1194322810Shselasky}
1195322810Shselasky
1196322810Shselaskystatic int mlx5_ib_dealloc_pd(struct ib_pd *pd)
1197322810Shselasky{
1198322810Shselasky	struct mlx5_ib_dev *mdev = to_mdev(pd->device);
1199322810Shselasky	struct mlx5_ib_pd *mpd = to_mpd(pd);
1200322810Shselasky
1201322810Shselasky	if (!pd->uobject)
1202322810Shselasky		free_pa_mkey(mdev, mpd->pa_lkey);
1203322810Shselasky
1204322810Shselasky	mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
1205322810Shselasky	kfree(mpd);
1206322810Shselasky
1207322810Shselasky	return 0;
1208322810Shselasky}
1209322810Shselasky
1210322810Shselaskystatic int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1211322810Shselasky{
1212322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1213322810Shselasky	int err;
1214322810Shselasky
1215322810Shselasky	if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1216322810Shselasky		err = -EOPNOTSUPP;
1217322810Shselasky	else
1218322810Shselasky		err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
1219322810Shselasky	if (err)
1220322810Shselasky		mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
1221322810Shselasky			     ibqp->qp_num, gid->raw);
1222322810Shselasky
1223322810Shselasky	return err;
1224322810Shselasky}
1225322810Shselasky
1226322810Shselaskystatic int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1227322810Shselasky{
1228322810Shselasky	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1229322810Shselasky	int err;
1230322810Shselasky
1231322810Shselasky	if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1232322810Shselasky		err = -EOPNOTSUPP;
1233322810Shselasky	else
1234322810Shselasky		err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
1235322810Shselasky	if (err)
1236322810Shselasky		mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
1237322810Shselasky			     ibqp->qp_num, gid->raw);
1238322810Shselasky
1239322810Shselasky	return err;
1240322810Shselasky}
1241322810Shselasky
1242322810Shselaskystatic int init_node_data(struct mlx5_ib_dev *dev)
1243322810Shselasky{
1244322810Shselasky	int err;
1245322810Shselasky
1246322810Shselasky	err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
1247322810Shselasky	if (err)
1248322810Shselasky		return err;
1249322810Shselasky
1250322810Shselasky	return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
1251322810Shselasky}
1252322810Shselasky
1253322810Shselaskystatic ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
1254322810Shselasky			     char *buf)
1255322810Shselasky{
1256322810Shselasky	struct mlx5_ib_dev *dev =
1257322810Shselasky		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1258322810Shselasky
1259322810Shselasky	return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
1260322810Shselasky}
1261322810Shselasky
1262322810Shselaskystatic ssize_t show_reg_pages(struct device *device,
1263322810Shselasky			      struct device_attribute *attr, char *buf)
1264322810Shselasky{
1265322810Shselasky	struct mlx5_ib_dev *dev =
1266322810Shselasky		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1267322810Shselasky
1268322810Shselasky	return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
1269322810Shselasky}
1270322810Shselasky
1271322810Shselaskystatic ssize_t show_hca(struct device *device, struct device_attribute *attr,
1272322810Shselasky			char *buf)
1273322810Shselasky{
1274322810Shselasky	struct mlx5_ib_dev *dev =
1275322810Shselasky		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1276322810Shselasky	return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1277322810Shselasky}
1278322810Shselasky
1279322810Shselaskystatic ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1280322810Shselasky			   char *buf)
1281322810Shselasky{
1282322810Shselasky	struct mlx5_ib_dev *dev =
1283322810Shselasky		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1284322810Shselasky	return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
1285322810Shselasky		       fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1286322810Shselasky}
1287322810Shselasky
1288322810Shselaskystatic ssize_t show_rev(struct device *device, struct device_attribute *attr,
1289322810Shselasky			char *buf)
1290322810Shselasky{
1291322810Shselasky	struct mlx5_ib_dev *dev =
1292322810Shselasky		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1293322810Shselasky	return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision);
1294322810Shselasky}
1295322810Shselasky
1296322810Shselaskystatic ssize_t show_board(struct device *device, struct device_attribute *attr,
1297322810Shselasky			  char *buf)
1298322810Shselasky{
1299322810Shselasky	struct mlx5_ib_dev *dev =
1300322810Shselasky		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1301322810Shselasky	return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
1302322810Shselasky		       dev->mdev->board_id);
1303322810Shselasky}
1304322810Shselasky
1305322810Shselaskystatic DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1306322810Shselaskystatic DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1307322810Shselaskystatic DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1308322810Shselaskystatic DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1309322810Shselaskystatic DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
1310322810Shselaskystatic DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1311322810Shselasky
1312322810Shselaskystatic struct device_attribute *mlx5_class_attributes[] = {
1313322810Shselasky	&dev_attr_hw_rev,
1314322810Shselasky	&dev_attr_fw_ver,
1315322810Shselasky	&dev_attr_hca_type,
1316322810Shselasky	&dev_attr_board_id,
1317322810Shselasky	&dev_attr_fw_pages,
1318322810Shselasky	&dev_attr_reg_pages,
1319322810Shselasky};
1320322810Shselasky
1321322810Shselaskystatic void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
1322322810Shselasky{
1323322810Shselasky	struct mlx5_ib_qp *mqp;
1324322810Shselasky	struct mlx5_ib_cq *send_mcq, *recv_mcq;
1325322810Shselasky	struct mlx5_core_cq *mcq;
1326322810Shselasky	struct list_head cq_armed_list;
1327322810Shselasky	unsigned long flags_qp;
1328322810Shselasky	unsigned long flags_cq;
1329322810Shselasky	unsigned long flags;
1330322810Shselasky
1331322810Shselasky	mlx5_ib_warn(ibdev, " started\n");
1332322810Shselasky	INIT_LIST_HEAD(&cq_armed_list);
1333322810Shselasky
1334322810Shselasky	/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
1335322810Shselasky	spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
1336322810Shselasky	list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
1337322810Shselasky		spin_lock_irqsave(&mqp->sq.lock, flags_qp);
1338322810Shselasky		if (mqp->sq.tail != mqp->sq.head) {
1339322810Shselasky			send_mcq = to_mcq(mqp->ibqp.send_cq);
1340322810Shselasky			spin_lock_irqsave(&send_mcq->lock, flags_cq);
1341322810Shselasky			if (send_mcq->mcq.comp &&
1342322810Shselasky			    mqp->ibqp.send_cq->comp_handler) {
1343322810Shselasky				if (!send_mcq->mcq.reset_notify_added) {
1344322810Shselasky					send_mcq->mcq.reset_notify_added = 1;
1345322810Shselasky					list_add_tail(&send_mcq->mcq.reset_notify,
1346322810Shselasky						      &cq_armed_list);
1347322810Shselasky				}
1348322810Shselasky			}
1349322810Shselasky			spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
1350322810Shselasky		}
1351322810Shselasky		spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
1352322810Shselasky		spin_lock_irqsave(&mqp->rq.lock, flags_qp);
1353322810Shselasky		/* no handling is needed for SRQ */
1354322810Shselasky		if (!mqp->ibqp.srq) {
1355322810Shselasky			if (mqp->rq.tail != mqp->rq.head) {
1356322810Shselasky				recv_mcq = to_mcq(mqp->ibqp.recv_cq);
1357322810Shselasky				spin_lock_irqsave(&recv_mcq->lock, flags_cq);
1358322810Shselasky				if (recv_mcq->mcq.comp &&
1359322810Shselasky				    mqp->ibqp.recv_cq->comp_handler) {
1360322810Shselasky					if (!recv_mcq->mcq.reset_notify_added) {
1361322810Shselasky						recv_mcq->mcq.reset_notify_added = 1;
1362322810Shselasky						list_add_tail(&recv_mcq->mcq.reset_notify,
1363322810Shselasky							      &cq_armed_list);
1364322810Shselasky					}
1365322810Shselasky				}
1366322810Shselasky				spin_unlock_irqrestore(&recv_mcq->lock,
1367322810Shselasky						       flags_cq);
1368322810Shselasky			}
1369322810Shselasky		}
1370322810Shselasky		spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
1371322810Shselasky	}
1372322810Shselasky	/*At that point all inflight post send were put to be executed as of we
1373322810Shselasky	 * lock/unlock above locks Now need to arm all involved CQs.
1374322810Shselasky	 */
1375322810Shselasky	list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
1376322810Shselasky		mcq->comp(mcq);
1377322810Shselasky	}
1378322810Shselasky	spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
1379322810Shselasky	mlx5_ib_warn(ibdev, " ended\n");
1380322810Shselasky	return;
1381322810Shselasky}
1382322810Shselasky
1383322810Shselaskystatic void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1384322810Shselasky			  enum mlx5_dev_event event, unsigned long param)
1385322810Shselasky{
1386322810Shselasky	struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
1387322810Shselasky	struct ib_event ibev;
1388322810Shselasky
1389322810Shselasky	u8 port = 0;
1390322810Shselasky
1391322810Shselasky	switch (event) {
1392322810Shselasky	case MLX5_DEV_EVENT_SYS_ERROR:
1393322810Shselasky		ibdev->ib_active = false;
1394322810Shselasky		ibev.event = IB_EVENT_DEVICE_FATAL;
1395322810Shselasky		mlx5_ib_handle_internal_error(ibdev);
1396322810Shselasky		break;
1397322810Shselasky
1398322810Shselasky	case MLX5_DEV_EVENT_PORT_UP:
1399322810Shselasky		ibev.event = IB_EVENT_PORT_ACTIVE;
1400322810Shselasky		port = (u8)param;
1401322810Shselasky		break;
1402322810Shselasky
1403322810Shselasky	case MLX5_DEV_EVENT_PORT_DOWN:
1404322810Shselasky	case MLX5_DEV_EVENT_PORT_INITIALIZED:
1405322810Shselasky		ibev.event = IB_EVENT_PORT_ERR;
1406322810Shselasky		port = (u8)param;
1407322810Shselasky		break;
1408322810Shselasky
1409322810Shselasky	case MLX5_DEV_EVENT_LID_CHANGE:
1410322810Shselasky		ibev.event = IB_EVENT_LID_CHANGE;
1411322810Shselasky		port = (u8)param;
1412322810Shselasky		break;
1413322810Shselasky
1414322810Shselasky	case MLX5_DEV_EVENT_PKEY_CHANGE:
1415322810Shselasky		ibev.event = IB_EVENT_PKEY_CHANGE;
1416322810Shselasky		port = (u8)param;
1417322810Shselasky		break;
1418322810Shselasky
1419322810Shselasky	case MLX5_DEV_EVENT_GUID_CHANGE:
1420322810Shselasky		ibev.event = IB_EVENT_GID_CHANGE;
1421322810Shselasky		port = (u8)param;
1422322810Shselasky		break;
1423322810Shselasky
1424322810Shselasky	case MLX5_DEV_EVENT_CLIENT_REREG:
1425322810Shselasky		ibev.event = IB_EVENT_CLIENT_REREGISTER;
1426322810Shselasky		port = (u8)param;
1427322810Shselasky		break;
1428322810Shselasky
1429322810Shselasky	default:
1430322810Shselasky		break;
1431322810Shselasky	}
1432322810Shselasky
1433322810Shselasky	ibev.device	      = &ibdev->ib_dev;
1434322810Shselasky	ibev.element.port_num = port;
1435322810Shselasky
1436322810Shselasky	if ((event != MLX5_DEV_EVENT_SYS_ERROR) &&
1437322810Shselasky	    (port < 1 || port > ibdev->num_ports)) {
1438322810Shselasky		mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
1439322810Shselasky		return;
1440322810Shselasky	}
1441322810Shselasky
1442322810Shselasky	if (ibdev->ib_active)
1443322810Shselasky		ib_dispatch_event(&ibev);
1444322810Shselasky}
1445322810Shselasky
1446322810Shselaskystatic void get_ext_port_caps(struct mlx5_ib_dev *dev)
1447322810Shselasky{
1448322810Shselasky	int port;
1449322810Shselasky
1450322810Shselasky	for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1451322810Shselasky		mlx5_query_ext_port_caps(dev, port);
1452322810Shselasky}
1453322810Shselasky
1454322810Shselaskystatic void config_atomic_responder(struct mlx5_ib_dev *dev,
1455322810Shselasky				    struct ib_device_attr *props)
1456322810Shselasky{
1457322810Shselasky	enum ib_atomic_cap cap = props->atomic_cap;
1458322810Shselasky
1459322810Shselasky#if 0
1460322810Shselasky	if (cap == IB_ATOMIC_HCA ||
1461322810Shselasky	    cap == IB_ATOMIC_GLOB)
1462322810Shselasky#endif
1463322810Shselasky		dev->enable_atomic_resp = 1;
1464322810Shselasky
1465322810Shselasky	dev->atomic_cap = cap;
1466322810Shselasky}
1467322810Shselasky
1468322810Shselaskyenum mlx5_addr_align {
1469322810Shselasky	MLX5_ADDR_ALIGN_0	= 0,
1470322810Shselasky	MLX5_ADDR_ALIGN_64	= 64,
1471322810Shselasky	MLX5_ADDR_ALIGN_128	= 128,
1472322810Shselasky};
1473322810Shselasky
1474322810Shselaskystatic int get_port_caps(struct mlx5_ib_dev *dev)
1475322810Shselasky{
1476322810Shselasky	struct ib_device_attr *dprops = NULL;
1477322810Shselasky	struct ib_port_attr *pprops = NULL;
1478322810Shselasky	int err = -ENOMEM;
1479322810Shselasky	int port;
1480322810Shselasky
1481322810Shselasky	pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1482322810Shselasky	if (!pprops)
1483322810Shselasky		goto out;
1484322810Shselasky
1485322810Shselasky	dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1486322810Shselasky	if (!dprops)
1487322810Shselasky		goto out;
1488322810Shselasky
1489322810Shselasky	err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1490322810Shselasky	if (err) {
1491322810Shselasky		mlx5_ib_warn(dev, "query_device failed %d\n", err);
1492322810Shselasky		goto out;
1493322810Shselasky	}
1494322810Shselasky	config_atomic_responder(dev, dprops);
1495322810Shselasky
1496322810Shselasky	for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1497322810Shselasky		err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1498322810Shselasky		if (err) {
1499322810Shselasky			mlx5_ib_warn(dev, "query_port %d failed %d\n",
1500322810Shselasky				     port, err);
1501322810Shselasky			break;
1502322810Shselasky		}
1503322810Shselasky		dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys;
1504322810Shselasky		dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len;
1505322810Shselasky		mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1506322810Shselasky			    dprops->max_pkeys, pprops->gid_tbl_len);
1507322810Shselasky	}
1508322810Shselasky
1509322810Shselaskyout:
1510322810Shselasky	kfree(pprops);
1511322810Shselasky	kfree(dprops);
1512322810Shselasky
1513322810Shselasky	return err;
1514322810Shselasky}
1515322810Shselasky
1516322810Shselaskystatic void destroy_umrc_res(struct mlx5_ib_dev *dev)
1517322810Shselasky{
1518322810Shselasky	int err;
1519322810Shselasky
1520322810Shselasky	err = mlx5_mr_cache_cleanup(dev);
1521322810Shselasky	if (err)
1522322810Shselasky		mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1523322810Shselasky
1524322810Shselasky	ib_dereg_mr(dev->umrc.mr);
1525322810Shselasky	ib_dealloc_pd(dev->umrc.pd);
1526322810Shselasky}
1527322810Shselasky
1528322810Shselaskyenum {
1529322810Shselasky	MAX_UMR_WR = 128,
1530322810Shselasky};
1531322810Shselasky
1532322810Shselaskystatic int create_umr_res(struct mlx5_ib_dev *dev)
1533322810Shselasky{
1534322810Shselasky	struct ib_pd *pd;
1535322810Shselasky	struct ib_mr *mr;
1536322810Shselasky	int ret;
1537322810Shselasky
1538322810Shselasky	pd = ib_alloc_pd(&dev->ib_dev);
1539322810Shselasky	if (IS_ERR(pd)) {
1540322810Shselasky		mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1541322810Shselasky		ret = PTR_ERR(pd);
1542322810Shselasky		goto error_0;
1543322810Shselasky	}
1544322810Shselasky
1545322810Shselasky	mr = ib_get_dma_mr(pd,  IB_ACCESS_LOCAL_WRITE);
1546322810Shselasky	if (IS_ERR(mr)) {
1547322810Shselasky		mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1548322810Shselasky		ret = PTR_ERR(mr);
1549322810Shselasky		goto error_1;
1550322810Shselasky	}
1551322810Shselasky
1552322810Shselasky	dev->umrc.mr = mr;
1553322810Shselasky	dev->umrc.pd = pd;
1554322810Shselasky
1555322810Shselasky	ret = mlx5_mr_cache_init(dev);
1556322810Shselasky	if (ret) {
1557322810Shselasky		mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1558322810Shselasky		goto error_4;
1559322810Shselasky	}
1560322810Shselasky
1561322810Shselasky	return 0;
1562322810Shselasky
1563322810Shselaskyerror_4:
1564322810Shselasky	ib_dereg_mr(mr);
1565322810Shselaskyerror_1:
1566322810Shselasky	ib_dealloc_pd(pd);
1567322810Shselaskyerror_0:
1568322810Shselasky	return ret;
1569322810Shselasky}
1570322810Shselasky
1571322810Shselaskystatic int create_dev_resources(struct mlx5_ib_resources *devr)
1572322810Shselasky{
1573322810Shselasky	struct ib_srq_init_attr attr;
1574322810Shselasky	struct mlx5_ib_dev *dev;
1575322810Shselasky	int ret = 0;
1576322810Shselasky	struct ib_cq_init_attr cq_attr = { .cqe = 1 };
1577322810Shselasky
1578322810Shselasky	dev = container_of(devr, struct mlx5_ib_dev, devr);
1579322810Shselasky
1580322810Shselasky	devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1581322810Shselasky	if (IS_ERR(devr->p0)) {
1582322810Shselasky		ret = PTR_ERR(devr->p0);
1583322810Shselasky		goto error0;
1584322810Shselasky	}
1585322810Shselasky	devr->p0->device  = &dev->ib_dev;
1586322810Shselasky	devr->p0->uobject = NULL;
1587322810Shselasky	atomic_set(&devr->p0->usecnt, 0);
1588322810Shselasky
1589322810Shselasky	devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
1590322810Shselasky	if (IS_ERR(devr->c0)) {
1591322810Shselasky		ret = PTR_ERR(devr->c0);
1592322810Shselasky		goto error1;
1593322810Shselasky	}
1594322810Shselasky	devr->c0->device        = &dev->ib_dev;
1595322810Shselasky	devr->c0->uobject       = NULL;
1596322810Shselasky	devr->c0->comp_handler  = NULL;
1597322810Shselasky	devr->c0->event_handler = NULL;
1598322810Shselasky	devr->c0->cq_context    = NULL;
1599322810Shselasky	atomic_set(&devr->c0->usecnt, 0);
1600322810Shselasky
1601322810Shselasky	devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1602322810Shselasky	if (IS_ERR(devr->x0)) {
1603322810Shselasky		ret = PTR_ERR(devr->x0);
1604322810Shselasky		goto error2;
1605322810Shselasky	}
1606322810Shselasky	devr->x0->device = &dev->ib_dev;
1607322810Shselasky	devr->x0->inode = NULL;
1608322810Shselasky	atomic_set(&devr->x0->usecnt, 0);
1609322810Shselasky	mutex_init(&devr->x0->tgt_qp_mutex);
1610322810Shselasky	INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1611322810Shselasky
1612322810Shselasky	devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1613322810Shselasky	if (IS_ERR(devr->x1)) {
1614322810Shselasky		ret = PTR_ERR(devr->x1);
1615322810Shselasky		goto error3;
1616322810Shselasky	}
1617322810Shselasky	devr->x1->device = &dev->ib_dev;
1618322810Shselasky	devr->x1->inode = NULL;
1619322810Shselasky	atomic_set(&devr->x1->usecnt, 0);
1620322810Shselasky	mutex_init(&devr->x1->tgt_qp_mutex);
1621322810Shselasky	INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1622322810Shselasky
1623322810Shselasky	memset(&attr, 0, sizeof(attr));
1624322810Shselasky	attr.attr.max_sge = 1;
1625322810Shselasky	attr.attr.max_wr = 1;
1626322810Shselasky	attr.srq_type = IB_SRQT_XRC;
1627322810Shselasky	attr.ext.xrc.cq = devr->c0;
1628322810Shselasky	attr.ext.xrc.xrcd = devr->x0;
1629322810Shselasky
1630322810Shselasky	devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1631322810Shselasky	if (IS_ERR(devr->s0)) {
1632322810Shselasky		ret = PTR_ERR(devr->s0);
1633322810Shselasky		goto error4;
1634322810Shselasky	}
1635322810Shselasky	devr->s0->device	= &dev->ib_dev;
1636322810Shselasky	devr->s0->pd		= devr->p0;
1637322810Shselasky	devr->s0->uobject       = NULL;
1638322810Shselasky	devr->s0->event_handler = NULL;
1639322810Shselasky	devr->s0->srq_context   = NULL;
1640322810Shselasky	devr->s0->srq_type      = IB_SRQT_XRC;
1641322810Shselasky	devr->s0->ext.xrc.xrcd  = devr->x0;
1642322810Shselasky	devr->s0->ext.xrc.cq	= devr->c0;
1643322810Shselasky	atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1644322810Shselasky	atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1645322810Shselasky	atomic_inc(&devr->p0->usecnt);
1646322810Shselasky	atomic_set(&devr->s0->usecnt, 0);
1647322810Shselasky
1648322810Shselasky	memset(&attr, 0, sizeof(attr));
1649322810Shselasky	attr.attr.max_sge = 1;
1650322810Shselasky	attr.attr.max_wr = 1;
1651322810Shselasky	attr.srq_type = IB_SRQT_BASIC;
1652322810Shselasky	devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1653322810Shselasky	if (IS_ERR(devr->s1)) {
1654322810Shselasky		ret = PTR_ERR(devr->s1);
1655322810Shselasky		goto error5;
1656322810Shselasky	}
1657322810Shselasky	devr->s1->device	= &dev->ib_dev;
1658322810Shselasky	devr->s1->pd		= devr->p0;
1659322810Shselasky	devr->s1->uobject       = NULL;
1660322810Shselasky	devr->s1->event_handler = NULL;
1661322810Shselasky	devr->s1->srq_context   = NULL;
1662322810Shselasky	devr->s1->srq_type      = IB_SRQT_BASIC;
1663322810Shselasky	devr->s1->ext.xrc.cq	= devr->c0;
1664322810Shselasky	atomic_inc(&devr->p0->usecnt);
1665322810Shselasky	atomic_set(&devr->s1->usecnt, 0);
1666322810Shselasky
1667322810Shselasky	return 0;
1668322810Shselasky
1669322810Shselaskyerror5:
1670322810Shselasky	mlx5_ib_destroy_srq(devr->s0);
1671322810Shselaskyerror4:
1672322810Shselasky	mlx5_ib_dealloc_xrcd(devr->x1);
1673322810Shselaskyerror3:
1674322810Shselasky	mlx5_ib_dealloc_xrcd(devr->x0);
1675322810Shselaskyerror2:
1676322810Shselasky	mlx5_ib_destroy_cq(devr->c0);
1677322810Shselaskyerror1:
1678322810Shselasky	mlx5_ib_dealloc_pd(devr->p0);
1679322810Shselaskyerror0:
1680322810Shselasky	return ret;
1681322810Shselasky}
1682322810Shselasky
1683322810Shselaskystatic void destroy_dev_resources(struct mlx5_ib_resources *devr)
1684322810Shselasky{
1685322810Shselasky	mlx5_ib_destroy_srq(devr->s1);
1686322810Shselasky	mlx5_ib_destroy_srq(devr->s0);
1687322810Shselasky	mlx5_ib_dealloc_xrcd(devr->x0);
1688322810Shselasky	mlx5_ib_dealloc_xrcd(devr->x1);
1689322810Shselasky	mlx5_ib_destroy_cq(devr->c0);
1690322810Shselasky	mlx5_ib_dealloc_pd(devr->p0);
1691322810Shselasky}
1692322810Shselasky
1693322810Shselaskystatic void enable_dc_tracer(struct mlx5_ib_dev *dev)
1694322810Shselasky{
1695322810Shselasky	struct device *device = dev->ib_dev.dma_device;
1696322810Shselasky	struct mlx5_dc_tracer *dct = &dev->dctr;
1697322810Shselasky	int order;
1698322810Shselasky	void *tmp;
1699322810Shselasky	int size;
1700322810Shselasky	int err;
1701322810Shselasky
1702322810Shselasky	size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096;
1703322810Shselasky	if (size <= PAGE_SIZE)
1704322810Shselasky		order = 0;
1705322810Shselasky	else
1706322810Shselasky		order = 1;
1707322810Shselasky
1708322810Shselasky	dct->pg = alloc_pages(GFP_KERNEL, order);
1709322810Shselasky	if (!dct->pg) {
1710322810Shselasky		mlx5_ib_err(dev, "failed to allocate %d pages\n", order);
1711322810Shselasky		return;
1712322810Shselasky	}
1713322810Shselasky
1714322810Shselasky	tmp = page_address(dct->pg);
1715322810Shselasky	memset(tmp, 0xff, size);
1716322810Shselasky
1717322810Shselasky	dct->size = size;
1718322810Shselasky	dct->order = order;
1719322810Shselasky	dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE);
1720322810Shselasky	if (dma_mapping_error(device, dct->dma)) {
1721322810Shselasky		mlx5_ib_err(dev, "dma mapping error\n");
1722322810Shselasky		goto map_err;
1723322810Shselasky	}
1724322810Shselasky
1725322810Shselasky	err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma);
1726322810Shselasky	if (err) {
1727322810Shselasky		mlx5_ib_warn(dev, "failed to enable DC tracer\n");
1728322810Shselasky		goto cmd_err;
1729322810Shselasky	}
1730322810Shselasky
1731322810Shselasky	return;
1732322810Shselasky
1733322810Shselaskycmd_err:
1734322810Shselasky	dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE);
1735322810Shselaskymap_err:
1736322810Shselasky	__free_pages(dct->pg, dct->order);
1737322810Shselasky	dct->pg = NULL;
1738322810Shselasky}
1739322810Shselasky
1740322810Shselaskystatic void disable_dc_tracer(struct mlx5_ib_dev *dev)
1741322810Shselasky{
1742322810Shselasky	struct device *device = dev->ib_dev.dma_device;
1743322810Shselasky	struct mlx5_dc_tracer *dct = &dev->dctr;
1744322810Shselasky	int err;
1745322810Shselasky
1746322810Shselasky	if (!dct->pg)
1747322810Shselasky		return;
1748322810Shselasky
1749322810Shselasky	err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma);
1750322810Shselasky	if (err) {
1751322810Shselasky		mlx5_ib_warn(dev, "failed to disable DC tracer\n");
1752322810Shselasky		return;
1753322810Shselasky	}
1754322810Shselasky
1755322810Shselasky	dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE);
1756322810Shselasky	__free_pages(dct->pg, dct->order);
1757322810Shselasky	dct->pg = NULL;
1758322810Shselasky}
1759322810Shselasky
1760322810Shselaskyenum {
1761322810Shselasky	MLX5_DC_CNAK_SIZE		= 128,
1762322810Shselasky	MLX5_NUM_BUF_IN_PAGE		= PAGE_SIZE / MLX5_DC_CNAK_SIZE,
1763322810Shselasky	MLX5_CNAK_TX_CQ_SIGNAL_FACTOR	= 128,
1764322810Shselasky	MLX5_DC_CNAK_SL			= 0,
1765322810Shselasky	MLX5_DC_CNAK_VL			= 0,
1766322810Shselasky};
1767322810Shselasky
1768322810Shselaskystatic int init_dc_improvements(struct mlx5_ib_dev *dev)
1769322810Shselasky{
1770322810Shselasky	if (!mlx5_core_is_pf(dev->mdev))
1771322810Shselasky		return 0;
1772322810Shselasky
1773322810Shselasky	if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace)))
1774322810Shselasky		return 0;
1775322810Shselasky
1776322810Shselasky	enable_dc_tracer(dev);
1777322810Shselasky
1778322810Shselasky	return 0;
1779322810Shselasky}
1780322810Shselasky
1781322810Shselaskystatic void cleanup_dc_improvements(struct mlx5_ib_dev *dev)
1782322810Shselasky{
1783322810Shselasky
1784322810Shselasky	disable_dc_tracer(dev);
1785322810Shselasky}
1786322810Shselasky
1787322810Shselaskystatic void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
1788322810Shselasky{
1789322810Shselasky	mlx5_vport_dealloc_q_counter(dev->mdev,
1790322810Shselasky				     MLX5_INTERFACE_PROTOCOL_IB,
1791322810Shselasky				     dev->port[port_num].q_cnt_id);
1792322810Shselasky	dev->port[port_num].q_cnt_id = 0;
1793322810Shselasky}
1794322810Shselasky
1795322810Shselaskystatic void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
1796322810Shselasky{
1797322810Shselasky	unsigned int i;
1798322810Shselasky
1799322810Shselasky	for (i = 0; i < dev->num_ports; i++)
1800322810Shselasky		mlx5_ib_dealloc_q_port_counter(dev, i);
1801322810Shselasky}
1802322810Shselasky
1803322810Shselaskystatic int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
1804322810Shselasky{
1805322810Shselasky	int i;
1806322810Shselasky	int ret;
1807322810Shselasky
1808322810Shselasky	for (i = 0; i < dev->num_ports; i++) {
1809322810Shselasky		ret = mlx5_vport_alloc_q_counter(dev->mdev,
1810322810Shselasky						 MLX5_INTERFACE_PROTOCOL_IB,
1811322810Shselasky						 &dev->port[i].q_cnt_id);
1812322810Shselasky		if (ret) {
1813322810Shselasky			mlx5_ib_warn(dev,
1814322810Shselasky				     "couldn't allocate queue counter for port %d\n",
1815322810Shselasky				     i + 1);
1816322810Shselasky			goto dealloc_counters;
1817322810Shselasky		}
1818322810Shselasky	}
1819322810Shselasky
1820322810Shselasky	return 0;
1821322810Shselasky
1822322810Shselaskydealloc_counters:
1823322810Shselasky	while (--i >= 0)
1824322810Shselasky		mlx5_ib_dealloc_q_port_counter(dev, i);
1825322810Shselasky
1826322810Shselasky	return ret;
1827322810Shselasky}
1828322810Shselasky
1829322810Shselaskystruct port_attribute {
1830322810Shselasky	struct attribute attr;
1831322810Shselasky	ssize_t (*show)(struct mlx5_ib_port *,
1832322810Shselasky			struct port_attribute *, char *buf);
1833322810Shselasky	ssize_t (*store)(struct mlx5_ib_port *,
1834322810Shselasky			 struct port_attribute *,
1835322810Shselasky			 const char *buf, size_t count);
1836322810Shselasky};
1837322810Shselasky
1838322810Shselaskystruct port_counter_attribute {
1839322810Shselasky	struct port_attribute	attr;
1840322810Shselasky	size_t			offset;
1841322810Shselasky};
1842322810Shselasky
1843322810Shselaskystatic ssize_t port_attr_show(struct kobject *kobj,
1844322810Shselasky			      struct attribute *attr, char *buf)
1845322810Shselasky{
1846322810Shselasky	struct port_attribute *port_attr =
1847322810Shselasky		container_of(attr, struct port_attribute, attr);
1848322810Shselasky	struct mlx5_ib_port_sysfs_group *p =
1849322810Shselasky		container_of(kobj, struct mlx5_ib_port_sysfs_group,
1850322810Shselasky			     kobj);
1851322810Shselasky	struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port,
1852322810Shselasky						    group);
1853322810Shselasky
1854322810Shselasky	if (!port_attr->show)
1855322810Shselasky		return -EIO;
1856322810Shselasky
1857322810Shselasky	return port_attr->show(mibport, port_attr, buf);
1858322810Shselasky}
1859322810Shselasky
1860322810Shselaskystatic ssize_t show_port_counter(struct mlx5_ib_port *p,
1861322810Shselasky				 struct port_attribute *port_attr,
1862322810Shselasky				 char *buf)
1863322810Shselasky{
1864322810Shselasky	int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
1865322810Shselasky	struct port_counter_attribute *counter_attr =
1866322810Shselasky		container_of(port_attr, struct port_counter_attribute, attr);
1867322810Shselasky	void *out;
1868322810Shselasky	int ret;
1869322810Shselasky
1870322810Shselasky	out = mlx5_vzalloc(outlen);
1871322810Shselasky	if (!out)
1872322810Shselasky		return -ENOMEM;
1873322810Shselasky
1874322810Shselasky	ret = mlx5_vport_query_q_counter(p->dev->mdev,
1875322810Shselasky					 p->q_cnt_id, 0,
1876322810Shselasky					 out, outlen);
1877322810Shselasky	if (ret)
1878322810Shselasky		goto free;
1879322810Shselasky
1880322810Shselasky	ret = sprintf(buf, "%d\n",
1881322810Shselasky		      be32_to_cpu(*(__be32 *)(out + counter_attr->offset)));
1882322810Shselasky
1883322810Shselaskyfree:
1884322810Shselasky	kfree(out);
1885322810Shselasky	return ret;
1886322810Shselasky}
1887322810Shselasky
1888322810Shselasky#define PORT_COUNTER_ATTR(_name)					\
1889322810Shselaskystruct port_counter_attribute port_counter_attr_##_name = {		\
1890322810Shselasky	.attr  = __ATTR(_name, S_IRUGO, show_port_counter, NULL),	\
1891322810Shselasky	.offset = MLX5_BYTE_OFF(query_q_counter_out, _name)		\
1892322810Shselasky}
1893322810Shselasky
1894322810Shselaskystatic PORT_COUNTER_ATTR(rx_write_requests);
1895322810Shselaskystatic PORT_COUNTER_ATTR(rx_read_requests);
1896322810Shselaskystatic PORT_COUNTER_ATTR(rx_atomic_requests);
1897322810Shselaskystatic PORT_COUNTER_ATTR(rx_dct_connect);
1898322810Shselaskystatic PORT_COUNTER_ATTR(out_of_buffer);
1899322810Shselaskystatic PORT_COUNTER_ATTR(out_of_sequence);
1900322810Shselaskystatic PORT_COUNTER_ATTR(duplicate_request);
1901322810Shselaskystatic PORT_COUNTER_ATTR(rnr_nak_retry_err);
1902322810Shselaskystatic PORT_COUNTER_ATTR(packet_seq_err);
1903322810Shselaskystatic PORT_COUNTER_ATTR(implied_nak_seq_err);
1904322810Shselaskystatic PORT_COUNTER_ATTR(local_ack_timeout_err);
1905322810Shselasky
1906322810Shselaskystatic struct attribute *counter_attrs[] = {
1907322810Shselasky	&port_counter_attr_rx_write_requests.attr.attr,
1908322810Shselasky	&port_counter_attr_rx_read_requests.attr.attr,
1909322810Shselasky	&port_counter_attr_rx_atomic_requests.attr.attr,
1910322810Shselasky	&port_counter_attr_rx_dct_connect.attr.attr,
1911322810Shselasky	&port_counter_attr_out_of_buffer.attr.attr,
1912322810Shselasky	&port_counter_attr_out_of_sequence.attr.attr,
1913322810Shselasky	&port_counter_attr_duplicate_request.attr.attr,
1914322810Shselasky	&port_counter_attr_rnr_nak_retry_err.attr.attr,
1915322810Shselasky	&port_counter_attr_packet_seq_err.attr.attr,
1916322810Shselasky	&port_counter_attr_implied_nak_seq_err.attr.attr,
1917322810Shselasky	&port_counter_attr_local_ack_timeout_err.attr.attr,
1918322810Shselasky	NULL
1919322810Shselasky};
1920322810Shselasky
1921322810Shselaskystatic struct attribute_group port_counters_group = {
1922322810Shselasky	.name  = "counters",
1923322810Shselasky	.attrs  = counter_attrs
1924322810Shselasky};
1925322810Shselasky
1926322810Shselaskystatic const struct sysfs_ops port_sysfs_ops = {
1927322810Shselasky	.show = port_attr_show
1928322810Shselasky};
1929322810Shselasky
1930322810Shselaskystatic struct kobj_type port_type = {
1931322810Shselasky	.sysfs_ops     = &port_sysfs_ops,
1932322810Shselasky};
1933322810Shselasky
1934322810Shselaskystatic int add_port_attrs(struct mlx5_ib_dev *dev,
1935322810Shselasky			  struct kobject *parent,
1936322810Shselasky			  struct mlx5_ib_port_sysfs_group *port,
1937322810Shselasky			  u8 port_num)
1938322810Shselasky{
1939322810Shselasky	int ret;
1940322810Shselasky
1941322810Shselasky	ret = kobject_init_and_add(&port->kobj, &port_type,
1942322810Shselasky				   parent,
1943322810Shselasky				   "%d", port_num);
1944322810Shselasky	if (ret)
1945322810Shselasky		return ret;
1946322810Shselasky
1947322810Shselasky	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1948322810Shselasky	    MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
1949322810Shselasky		ret = sysfs_create_group(&port->kobj, &port_counters_group);
1950322810Shselasky		if (ret)
1951322810Shselasky			goto put_kobj;
1952322810Shselasky	}
1953322810Shselasky
1954322810Shselasky	port->enabled = true;
1955322810Shselasky	return ret;
1956322810Shselasky
1957322810Shselaskyput_kobj:
1958322810Shselasky	kobject_put(&port->kobj);
1959322810Shselasky	return ret;
1960322810Shselasky}
1961322810Shselasky
1962322810Shselaskystatic void destroy_ports_attrs(struct mlx5_ib_dev *dev,
1963322810Shselasky				unsigned int num_ports)
1964322810Shselasky{
1965322810Shselasky	unsigned int i;
1966322810Shselasky
1967322810Shselasky	for (i = 0; i < num_ports; i++) {
1968322810Shselasky		struct mlx5_ib_port_sysfs_group *port =
1969322810Shselasky			&dev->port[i].group;
1970322810Shselasky
1971322810Shselasky		if (!port->enabled)
1972322810Shselasky			continue;
1973322810Shselasky
1974322810Shselasky		if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1975322810Shselasky		    MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
1976322810Shselasky			sysfs_remove_group(&port->kobj,
1977322810Shselasky					   &port_counters_group);
1978322810Shselasky		kobject_put(&port->kobj);
1979322810Shselasky		port->enabled = false;
1980322810Shselasky	}
1981322810Shselasky
1982322810Shselasky	if (dev->ports_parent) {
1983322810Shselasky		kobject_put(dev->ports_parent);
1984322810Shselasky		dev->ports_parent = NULL;
1985322810Shselasky	}
1986322810Shselasky}
1987322810Shselasky
1988322810Shselaskystatic int create_port_attrs(struct mlx5_ib_dev *dev)
1989322810Shselasky{
1990322810Shselasky	int ret = 0;
1991322810Shselasky	unsigned int i = 0;
1992322810Shselasky	struct device *device = &dev->ib_dev.dev;
1993322810Shselasky
1994322810Shselasky	dev->ports_parent = kobject_create_and_add("mlx5_ports",
1995322810Shselasky						   &device->kobj);
1996322810Shselasky	if (!dev->ports_parent)
1997322810Shselasky		return -ENOMEM;
1998322810Shselasky
1999322810Shselasky	for (i = 0; i < dev->num_ports; i++) {
2000322810Shselasky		ret = add_port_attrs(dev,
2001322810Shselasky				     dev->ports_parent,
2002322810Shselasky				     &dev->port[i].group,
2003322810Shselasky				     i + 1);
2004322810Shselasky
2005322810Shselasky		if (ret)
2006322810Shselasky			goto _destroy_ports_attrs;
2007322810Shselasky	}
2008322810Shselasky
2009322810Shselasky	return 0;
2010322810Shselasky
2011322810Shselasky_destroy_ports_attrs:
2012322810Shselasky	destroy_ports_attrs(dev, i);
2013322810Shselasky	return ret;
2014322810Shselasky}
2015322810Shselasky
2016322810Shselaskystatic void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2017322810Shselasky{
2018322810Shselasky	struct mlx5_ib_dev *dev;
2019322810Shselasky	int err;
2020322810Shselasky	int i;
2021322810Shselasky
2022322810Shselasky	printk_once(KERN_INFO "%s", mlx5_version);
2023322810Shselasky
2024322810Shselasky	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
2025322810Shselasky	if (!dev)
2026322810Shselasky		return NULL;
2027322810Shselasky
2028322810Shselasky	dev->mdev = mdev;
2029322810Shselasky
2030322810Shselasky	dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
2031322810Shselasky			     GFP_KERNEL);
2032322810Shselasky	if (!dev->port)
2033322810Shselasky		goto err_dealloc;
2034322810Shselasky
2035322810Shselasky	for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2036322810Shselasky		dev->port[i].dev = dev;
2037322810Shselasky		dev->port[i].port_num = i;
2038322810Shselasky		dev->port[i].port_gone = 0;
2039322810Shselasky		memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table));
2040322810Shselasky	}
2041322810Shselasky
2042322810Shselasky	err = get_port_caps(dev);
2043322810Shselasky	if (err)
2044322810Shselasky		goto err_free_port;
2045322810Shselasky
2046322810Shselasky	if (mlx5_use_mad_ifc(dev))
2047322810Shselasky		get_ext_port_caps(dev);
2048322810Shselasky
2049322810Shselasky	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2050322810Shselasky	    IB_LINK_LAYER_ETHERNET) {
2051322810Shselasky		if (MLX5_CAP_GEN(mdev, roce)) {
2052322810Shselasky			err = mlx5_nic_vport_enable_roce(mdev);
2053322810Shselasky			if (err)
2054322810Shselasky				goto err_free_port;
2055322810Shselasky		} else {
2056322810Shselasky			goto err_free_port;
2057322810Shselasky		}
2058322810Shselasky	}
2059322810Shselasky
2060322810Shselasky	MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
2061322810Shselasky
2062322810Shselasky	strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
2063322810Shselasky	dev->ib_dev.owner		= THIS_MODULE;
2064322810Shselasky	dev->ib_dev.node_type		= RDMA_NODE_IB_CA;
2065322810Shselasky	dev->ib_dev.local_dma_lkey	= mdev->special_contexts.resd_lkey;
2066322810Shselasky	dev->num_ports		= MLX5_CAP_GEN(mdev, num_ports);
2067322810Shselasky	dev->ib_dev.phys_port_cnt     = dev->num_ports;
2068322810Shselasky	dev->ib_dev.num_comp_vectors    =
2069322810Shselasky		dev->mdev->priv.eq_table.num_comp_vectors;
2070322810Shselasky	dev->ib_dev.dma_device	= &mdev->pdev->dev;
2071322810Shselasky
2072322810Shselasky	dev->ib_dev.uverbs_abi_ver	= MLX5_IB_UVERBS_ABI_VERSION;
2073322810Shselasky	dev->ib_dev.uverbs_cmd_mask	=
2074322810Shselasky		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
2075322810Shselasky		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
2076322810Shselasky		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
2077322810Shselasky		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
2078322810Shselasky		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
2079322810Shselasky		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
2080322810Shselasky		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
2081322810Shselasky		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
2082322810Shselasky		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
2083322810Shselasky		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
2084322810Shselasky		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
2085322810Shselasky		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
2086322810Shselasky		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
2087322810Shselasky		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
2088322810Shselasky		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
2089322810Shselasky		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
2090322810Shselasky		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
2091322810Shselasky		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
2092322810Shselasky		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
2093322810Shselasky		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
2094322810Shselasky		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
2095322810Shselasky		(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)		|
2096322810Shselasky		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
2097322810Shselasky
2098322810Shselasky	dev->ib_dev.query_device	= mlx5_ib_query_device;
2099322810Shselasky	dev->ib_dev.query_port		= mlx5_ib_query_port;
2100322810Shselasky	dev->ib_dev.get_link_layer	= mlx5_ib_port_link_layer;
2101322810Shselasky	dev->ib_dev.get_netdev		= mlx5_ib_get_netdev;
2102322810Shselasky	dev->ib_dev.query_gid		= mlx5_ib_query_gid;
2103322810Shselasky	dev->ib_dev.query_pkey		= mlx5_ib_query_pkey;
2104322810Shselasky	dev->ib_dev.modify_device	= mlx5_ib_modify_device;
2105322810Shselasky	dev->ib_dev.modify_port		= mlx5_ib_modify_port;
2106322810Shselasky	dev->ib_dev.alloc_ucontext	= mlx5_ib_alloc_ucontext;
2107322810Shselasky	dev->ib_dev.dealloc_ucontext	= mlx5_ib_dealloc_ucontext;
2108322810Shselasky	dev->ib_dev.mmap		= mlx5_ib_mmap;
2109322810Shselasky	dev->ib_dev.alloc_pd		= mlx5_ib_alloc_pd;
2110322810Shselasky	dev->ib_dev.dealloc_pd		= mlx5_ib_dealloc_pd;
2111322810Shselasky	dev->ib_dev.create_ah		= mlx5_ib_create_ah;
2112322810Shselasky	dev->ib_dev.query_ah		= mlx5_ib_query_ah;
2113322810Shselasky	dev->ib_dev.destroy_ah		= mlx5_ib_destroy_ah;
2114322810Shselasky	dev->ib_dev.create_srq		= mlx5_ib_create_srq;
2115322810Shselasky	dev->ib_dev.modify_srq		= mlx5_ib_modify_srq;
2116322810Shselasky	dev->ib_dev.query_srq		= mlx5_ib_query_srq;
2117322810Shselasky	dev->ib_dev.destroy_srq		= mlx5_ib_destroy_srq;
2118322810Shselasky	dev->ib_dev.post_srq_recv	= mlx5_ib_post_srq_recv;
2119322810Shselasky	dev->ib_dev.create_qp		= mlx5_ib_create_qp;
2120322810Shselasky	dev->ib_dev.modify_qp		= mlx5_ib_modify_qp;
2121322810Shselasky	dev->ib_dev.query_qp		= mlx5_ib_query_qp;
2122322810Shselasky	dev->ib_dev.destroy_qp		= mlx5_ib_destroy_qp;
2123322810Shselasky	dev->ib_dev.post_send		= mlx5_ib_post_send;
2124322810Shselasky	dev->ib_dev.post_recv		= mlx5_ib_post_recv;
2125322810Shselasky	dev->ib_dev.create_cq		= mlx5_ib_create_cq;
2126322810Shselasky	dev->ib_dev.modify_cq		= mlx5_ib_modify_cq;
2127322810Shselasky	dev->ib_dev.resize_cq		= mlx5_ib_resize_cq;
2128322810Shselasky	dev->ib_dev.destroy_cq		= mlx5_ib_destroy_cq;
2129322810Shselasky	dev->ib_dev.poll_cq		= mlx5_ib_poll_cq;
2130322810Shselasky	dev->ib_dev.req_notify_cq	= mlx5_ib_arm_cq;
2131322810Shselasky	dev->ib_dev.get_dma_mr		= mlx5_ib_get_dma_mr;
2132322810Shselasky	dev->ib_dev.reg_user_mr		= mlx5_ib_reg_user_mr;
2133322810Shselasky	dev->ib_dev.reg_phys_mr		= mlx5_ib_reg_phys_mr;
2134322810Shselasky	dev->ib_dev.dereg_mr		= mlx5_ib_dereg_mr;
2135322810Shselasky	dev->ib_dev.attach_mcast	= mlx5_ib_mcg_attach;
2136322810Shselasky	dev->ib_dev.detach_mcast	= mlx5_ib_mcg_detach;
2137322810Shselasky	dev->ib_dev.process_mad		= mlx5_ib_process_mad;
2138322810Shselasky	dev->ib_dev.alloc_fast_reg_mr	= mlx5_ib_alloc_fast_reg_mr;
2139322810Shselasky	dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
2140322810Shselasky	dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
2141322810Shselasky
2142322810Shselasky	if (MLX5_CAP_GEN(mdev, xrc)) {
2143322810Shselasky		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
2144322810Shselasky		dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
2145322810Shselasky		dev->ib_dev.uverbs_cmd_mask |=
2146322810Shselasky			(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2147322810Shselasky			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2148322810Shselasky	}
2149322810Shselasky
2150322810Shselasky	err = init_node_data(dev);
2151322810Shselasky	if (err)
2152322810Shselasky		goto err_disable_roce;
2153322810Shselasky
2154322810Shselasky	mutex_init(&dev->cap_mask_mutex);
2155322810Shselasky	INIT_LIST_HEAD(&dev->qp_list);
2156322810Shselasky	spin_lock_init(&dev->reset_flow_resource_lock);
2157322810Shselasky
2158322810Shselasky	err = create_dev_resources(&dev->devr);
2159322810Shselasky	if (err)
2160322810Shselasky		goto err_disable_roce;
2161322810Shselasky
2162322810Shselasky
2163322810Shselasky	err = mlx5_ib_alloc_q_counters(dev);
2164322810Shselasky	if (err)
2165322810Shselasky		goto err_odp;
2166322810Shselasky
2167322810Shselasky	err = ib_register_device(&dev->ib_dev, NULL);
2168322810Shselasky	if (err)
2169322810Shselasky		goto err_q_cnt;
2170322810Shselasky
2171322810Shselasky	err = create_umr_res(dev);
2172322810Shselasky	if (err)
2173322810Shselasky		goto err_dev;
2174322810Shselasky
2175322810Shselasky	if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2176322810Shselasky	    MLX5_CAP_PORT_TYPE_IB) {
2177322810Shselasky		if (init_dc_improvements(dev))
2178322810Shselasky			mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n");
2179322810Shselasky	}
2180322810Shselasky
2181322810Shselasky	err = create_port_attrs(dev);
2182322810Shselasky	if (err)
2183322810Shselasky		goto err_dc;
2184322810Shselasky
2185322810Shselasky	for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2186322810Shselasky		err = device_create_file(&dev->ib_dev.dev,
2187322810Shselasky					 mlx5_class_attributes[i]);
2188322810Shselasky		if (err)
2189322810Shselasky			goto err_port_attrs;
2190322810Shselasky	}
2191322810Shselasky
2192322810Shselasky	if (1) {
2193322810Shselasky		struct thread *rl_thread = NULL;
2194322810Shselasky		struct proc *rl_proc = NULL;
2195322810Shselasky
2196322810Shselasky		for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2197322810Shselasky			(void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread,
2198322810Shselasky			    RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i);
2199322810Shselasky		}
2200322810Shselasky	}
2201322810Shselasky
2202322810Shselasky	dev->ib_active = true;
2203322810Shselasky
2204322810Shselasky	return dev;
2205322810Shselasky
2206322810Shselaskyerr_port_attrs:
2207322810Shselasky	destroy_ports_attrs(dev, dev->num_ports);
2208322810Shselasky
2209322810Shselaskyerr_dc:
2210322810Shselasky	if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2211322810Shselasky	    MLX5_CAP_PORT_TYPE_IB)
2212322810Shselasky		cleanup_dc_improvements(dev);
2213322810Shselasky	destroy_umrc_res(dev);
2214322810Shselasky
2215322810Shselaskyerr_dev:
2216322810Shselasky	ib_unregister_device(&dev->ib_dev);
2217322810Shselasky
2218322810Shselaskyerr_q_cnt:
2219322810Shselasky	mlx5_ib_dealloc_q_counters(dev);
2220322810Shselasky
2221322810Shselaskyerr_odp:
2222322810Shselasky	destroy_dev_resources(&dev->devr);
2223322810Shselasky
2224322810Shselaskyerr_disable_roce:
2225322810Shselasky	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2226322810Shselasky	    IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2227322810Shselasky		mlx5_nic_vport_disable_roce(mdev);
2228322810Shselaskyerr_free_port:
2229322810Shselasky	kfree(dev->port);
2230322810Shselasky
2231322810Shselaskyerr_dealloc:
2232322810Shselasky	ib_dealloc_device((struct ib_device *)dev);
2233322810Shselasky
2234322810Shselasky	return NULL;
2235322810Shselasky}
2236322810Shselasky
2237322810Shselaskystatic void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
2238322810Shselasky{
2239322810Shselasky	struct mlx5_ib_dev *dev = context;
2240322810Shselasky	int i;
2241322810Shselasky
2242322810Shselasky	for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2243322810Shselasky		dev->port[i].port_gone = 1;
2244322810Shselasky		while (dev->port[i].port_gone != 2)
2245322810Shselasky			pause("W", hz);
2246322810Shselasky	}
2247322810Shselasky
2248322810Shselasky	for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2249322810Shselasky		device_remove_file(&dev->ib_dev.dev,
2250322810Shselasky		    mlx5_class_attributes[i]);
2251322810Shselasky	}
2252322810Shselasky
2253322810Shselasky	destroy_ports_attrs(dev, dev->num_ports);
2254322810Shselasky	if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2255322810Shselasky	    MLX5_CAP_PORT_TYPE_IB)
2256322810Shselasky		cleanup_dc_improvements(dev);
2257322810Shselasky	mlx5_ib_dealloc_q_counters(dev);
2258322810Shselasky	ib_unregister_device(&dev->ib_dev);
2259322810Shselasky	destroy_umrc_res(dev);
2260322810Shselasky	destroy_dev_resources(&dev->devr);
2261322810Shselasky
2262322810Shselasky	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2263322810Shselasky	    IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2264322810Shselasky		mlx5_nic_vport_disable_roce(mdev);
2265322810Shselasky
2266322810Shselasky	kfree(dev->port);
2267322810Shselasky	ib_dealloc_device(&dev->ib_dev);
2268322810Shselasky}
2269322810Shselasky
2270322810Shselaskystatic struct mlx5_interface mlx5_ib_interface = {
2271322810Shselasky	.add            = mlx5_ib_add,
2272322810Shselasky	.remove         = mlx5_ib_remove,
2273322810Shselasky	.event          = mlx5_ib_event,
2274322810Shselasky	.protocol	= MLX5_INTERFACE_PROTOCOL_IB,
2275322810Shselasky};
2276322810Shselasky
2277322810Shselaskystatic int __init mlx5_ib_init(void)
2278322810Shselasky{
2279322810Shselasky	int err;
2280322810Shselasky
2281322810Shselasky	if (deprecated_prof_sel != 2)
2282322810Shselasky		printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
2283322810Shselasky
2284322810Shselasky	err = mlx5_register_interface(&mlx5_ib_interface);
2285322810Shselasky	if (err)
2286322810Shselasky		goto clean_odp;
2287322810Shselasky
2288322810Shselasky	mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq");
2289322810Shselasky	if (!mlx5_ib_wq) {
2290322810Shselasky		printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__);
2291322810Shselasky		goto err_unreg;
2292322810Shselasky	}
2293322810Shselasky
2294322810Shselasky	return err;
2295322810Shselasky
2296322810Shselaskyerr_unreg:
2297322810Shselasky	mlx5_unregister_interface(&mlx5_ib_interface);
2298322810Shselasky
2299322810Shselaskyclean_odp:
2300322810Shselasky	return err;
2301322810Shselasky}
2302322810Shselasky
2303322810Shselaskystatic void __exit mlx5_ib_cleanup(void)
2304322810Shselasky{
2305322810Shselasky	destroy_workqueue(mlx5_ib_wq);
2306322810Shselasky	mlx5_unregister_interface(&mlx5_ib_interface);
2307322810Shselasky}
2308322810Shselasky
2309322810Shselaskymodule_init_order(mlx5_ib_init, SI_ORDER_THIRD);
2310322810Shselaskymodule_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD);
2311