mlx5_ib_main.c revision 325603
1/*-
2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/11/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c 325603 2017-11-09 17:54:00Z hselasky $
26 */
27
28#include <linux/errno.h>
29#include <linux/pci.h>
30#include <linux/dma-mapping.h>
31#include <linux/slab.h>
32#include <linux/io-mapping.h>
33#include <linux/sched.h>
34#include <linux/netdevice.h>
35#include <linux/etherdevice.h>
36#include <linux/list.h>
37#include <dev/mlx5/driver.h>
38#include <dev/mlx5/vport.h>
39#include <asm/pgtable.h>
40#include <linux/fs.h>
41#undef inode
42
43#include <rdma/ib_user_verbs.h>
44#include <rdma/ib_smi.h>
45#include <rdma/ib_umem.h>
46#include "user.h"
47#include "mlx5_ib.h"
48
49#include <sys/unistd.h>
50#include <sys/kthread.h>
51
52#define DRIVER_NAME "mlx5_ib"
53#define DRIVER_VERSION "3.2-rc1"
54#define DRIVER_RELDATE	"May 2016"
55
56MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
57MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
58MODULE_LICENSE("Dual BSD/GPL");
59MODULE_DEPEND(mlx5ib, linuxkpi, 1, 1, 1);
60MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
61MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
62MODULE_VERSION(mlx5ib, 1);
63
64static int deprecated_prof_sel = 2;
65module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
66MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
67
68enum {
69	MLX5_STANDARD_ATOMIC_SIZE = 0x8,
70};
71
72struct workqueue_struct *mlx5_ib_wq;
73
74static char mlx5_version[] =
75	DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
76	DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
77
78static void get_atomic_caps(struct mlx5_ib_dev *dev,
79			    struct ib_device_attr *props)
80{
81	int tmp;
82	u8 atomic_operations;
83	u8 atomic_size_qp;
84	u8 atomic_req_endianess;
85
86	atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
87	atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
88	atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev,
89					       atomic_req_8B_endianess_mode) ||
90			       !mlx5_host_is_le();
91
92	tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
93	if (((atomic_operations & tmp) == tmp)
94	    && (atomic_size_qp & 8)) {
95		if (atomic_req_endianess) {
96			props->atomic_cap = IB_ATOMIC_HCA;
97		} else {
98			props->atomic_cap = IB_ATOMIC_NONE;
99		}
100	} else {
101		props->atomic_cap = IB_ATOMIC_NONE;
102	}
103
104	tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD;
105	if (((atomic_operations & tmp) == tmp)
106	    &&(atomic_size_qp & 8)) {
107		if (atomic_req_endianess)
108			props->masked_atomic_cap = IB_ATOMIC_HCA;
109		else {
110			props->masked_atomic_cap = IB_ATOMIC_NONE;
111		}
112	} else {
113		props->masked_atomic_cap = IB_ATOMIC_NONE;
114	}
115}
116
117static enum rdma_link_layer
118mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
119{
120	struct mlx5_ib_dev *dev = to_mdev(device);
121
122	switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
123	case MLX5_CAP_PORT_TYPE_IB:
124		return IB_LINK_LAYER_INFINIBAND;
125	case MLX5_CAP_PORT_TYPE_ETH:
126		return IB_LINK_LAYER_ETHERNET;
127	default:
128		return IB_LINK_LAYER_UNSPECIFIED;
129	}
130}
131
132static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
133{
134	return !dev->mdev->issi;
135}
136
137enum {
138	MLX5_VPORT_ACCESS_METHOD_MAD,
139	MLX5_VPORT_ACCESS_METHOD_HCA,
140	MLX5_VPORT_ACCESS_METHOD_NIC,
141};
142
143static int mlx5_get_vport_access_method(struct ib_device *ibdev)
144{
145	if (mlx5_use_mad_ifc(to_mdev(ibdev)))
146		return MLX5_VPORT_ACCESS_METHOD_MAD;
147
148	if (mlx5_ib_port_link_layer(ibdev, 1) ==
149	    IB_LINK_LAYER_ETHERNET)
150		return MLX5_VPORT_ACCESS_METHOD_NIC;
151
152	return MLX5_VPORT_ACCESS_METHOD_HCA;
153}
154
155static int mlx5_query_system_image_guid(struct ib_device *ibdev,
156					__be64 *sys_image_guid)
157{
158	struct mlx5_ib_dev *dev = to_mdev(ibdev);
159	struct mlx5_core_dev *mdev = dev->mdev;
160	u64 tmp;
161	int err;
162
163	switch (mlx5_get_vport_access_method(ibdev)) {
164	case MLX5_VPORT_ACCESS_METHOD_MAD:
165		return mlx5_query_system_image_guid_mad_ifc(ibdev,
166							    sys_image_guid);
167
168	case MLX5_VPORT_ACCESS_METHOD_HCA:
169		err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
170		if (!err)
171			*sys_image_guid = cpu_to_be64(tmp);
172		return err;
173
174	case MLX5_VPORT_ACCESS_METHOD_NIC:
175		err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
176		if (!err)
177			*sys_image_guid = cpu_to_be64(tmp);
178		return err;
179
180	default:
181		return -EINVAL;
182	}
183}
184
185static int mlx5_query_max_pkeys(struct ib_device *ibdev,
186				u16 *max_pkeys)
187{
188	struct mlx5_ib_dev *dev = to_mdev(ibdev);
189	struct mlx5_core_dev *mdev = dev->mdev;
190
191	switch (mlx5_get_vport_access_method(ibdev)) {
192	case MLX5_VPORT_ACCESS_METHOD_MAD:
193		return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys);
194
195	case MLX5_VPORT_ACCESS_METHOD_HCA:
196	case MLX5_VPORT_ACCESS_METHOD_NIC:
197		*max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
198						pkey_table_size));
199		return 0;
200
201	default:
202		return -EINVAL;
203	}
204}
205
206static int mlx5_query_vendor_id(struct ib_device *ibdev,
207				u32 *vendor_id)
208{
209	struct mlx5_ib_dev *dev = to_mdev(ibdev);
210
211	switch (mlx5_get_vport_access_method(ibdev)) {
212	case MLX5_VPORT_ACCESS_METHOD_MAD:
213		return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id);
214
215	case MLX5_VPORT_ACCESS_METHOD_HCA:
216	case MLX5_VPORT_ACCESS_METHOD_NIC:
217		return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
218
219	default:
220		return -EINVAL;
221	}
222}
223
224static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
225				__be64 *node_guid)
226{
227	u64 tmp;
228	int err;
229
230	switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
231	case MLX5_VPORT_ACCESS_METHOD_MAD:
232		return mlx5_query_node_guid_mad_ifc(dev, node_guid);
233
234	case MLX5_VPORT_ACCESS_METHOD_HCA:
235		err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
236		if (!err)
237			*node_guid = cpu_to_be64(tmp);
238		return err;
239
240	case MLX5_VPORT_ACCESS_METHOD_NIC:
241		err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
242		if (!err)
243			*node_guid = cpu_to_be64(tmp);
244		return err;
245
246	default:
247		return -EINVAL;
248	}
249}
250
251struct mlx5_reg_node_desc {
252	u8	desc[64];
253};
254
255static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
256{
257	struct mlx5_reg_node_desc in;
258
259	if (mlx5_use_mad_ifc(dev))
260		return mlx5_query_node_desc_mad_ifc(dev, node_desc);
261
262	memset(&in, 0, sizeof(in));
263
264	return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
265				    sizeof(struct mlx5_reg_node_desc),
266				    MLX5_REG_NODE_DESC, 0, 0);
267}
268
269static int mlx5_ib_query_device(struct ib_device *ibdev,
270				struct ib_device_attr *props)
271{
272	struct mlx5_ib_dev *dev = to_mdev(ibdev);
273	struct mlx5_core_dev *mdev = dev->mdev;
274	int max_sq_desc;
275	int max_rq_sg;
276	int max_sq_sg;
277	int err;
278
279
280	memset(props, 0, sizeof(*props));
281
282	err = mlx5_query_system_image_guid(ibdev,
283					   &props->sys_image_guid);
284	if (err)
285		return err;
286
287	err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
288	if (err)
289		return err;
290
291	err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
292	if (err)
293		return err;
294
295	props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
296		((u64)fw_rev_min(dev->mdev) << 16) |
297		fw_rev_sub(dev->mdev);
298	props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
299		IB_DEVICE_PORT_ACTIVE_EVENT		|
300		IB_DEVICE_SYS_IMAGE_GUID		|
301		IB_DEVICE_RC_RNR_NAK_GEN;
302
303	if (MLX5_CAP_GEN(mdev, pkv))
304		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
305	if (MLX5_CAP_GEN(mdev, qkv))
306		props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
307	if (MLX5_CAP_GEN(mdev, apm))
308		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
309	props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
310	if (MLX5_CAP_GEN(mdev, xrc))
311		props->device_cap_flags |= IB_DEVICE_XRC;
312	props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
313	if (MLX5_CAP_GEN(mdev, block_lb_mc))
314		props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
315
316	props->vendor_part_id	   = mdev->pdev->device;
317	props->hw_ver		   = mdev->pdev->revision;
318
319	props->max_mr_size	   = ~0ull;
320	props->page_size_cap	   = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1);
321	props->max_qp		   = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
322	props->max_qp_wr	   = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
323	max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
324		     sizeof(struct mlx5_wqe_data_seg);
325	max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
326	max_sq_sg = (max_sq_desc -
327		     sizeof(struct mlx5_wqe_ctrl_seg) -
328		     sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg);
329	props->max_sge = min(max_rq_sg, max_sq_sg);
330	props->max_cq		   = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
331	props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
332	props->max_mr		   = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
333	props->max_pd		   = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
334	props->max_qp_rd_atom	   = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
335	props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
336	props->max_srq		   = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
337	props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
338	props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
339	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
340	props->max_srq_sge	   = max_rq_sg - 1;
341	props->max_fast_reg_page_list_len = (unsigned int)-1;
342	get_atomic_caps(dev, props);
343	props->max_mcast_grp	   = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
344	props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
345	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
346					   props->max_mcast_grp;
347	props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
348	props->max_ah		= INT_MAX;
349
350	return 0;
351}
352
353enum mlx5_ib_width {
354	MLX5_IB_WIDTH_1X	= 1 << 0,
355	MLX5_IB_WIDTH_2X	= 1 << 1,
356	MLX5_IB_WIDTH_4X	= 1 << 2,
357	MLX5_IB_WIDTH_8X	= 1 << 3,
358	MLX5_IB_WIDTH_12X	= 1 << 4
359};
360
361static int translate_active_width(struct ib_device *ibdev, u8 active_width,
362				  u8 *ib_width)
363{
364	struct mlx5_ib_dev *dev = to_mdev(ibdev);
365	int err = 0;
366
367	if (active_width & MLX5_IB_WIDTH_1X) {
368		*ib_width = IB_WIDTH_1X;
369	} else if (active_width & MLX5_IB_WIDTH_2X) {
370		mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n",
371			     (int)active_width);
372		err = -EINVAL;
373	} else if (active_width & MLX5_IB_WIDTH_4X) {
374		*ib_width = IB_WIDTH_4X;
375	} else if (active_width & MLX5_IB_WIDTH_8X) {
376		*ib_width = IB_WIDTH_8X;
377	} else if (active_width & MLX5_IB_WIDTH_12X) {
378		*ib_width = IB_WIDTH_12X;
379	} else {
380		mlx5_ib_dbg(dev, "Invalid active_width %d\n",
381			    (int)active_width);
382		err = -EINVAL;
383	}
384
385	return err;
386}
387
388/*
389 * TODO: Move to IB core
390 */
391enum ib_max_vl_num {
392	__IB_MAX_VL_0		= 1,
393	__IB_MAX_VL_0_1		= 2,
394	__IB_MAX_VL_0_3		= 3,
395	__IB_MAX_VL_0_7		= 4,
396	__IB_MAX_VL_0_14	= 5,
397};
398
399enum mlx5_vl_hw_cap {
400	MLX5_VL_HW_0	= 1,
401	MLX5_VL_HW_0_1	= 2,
402	MLX5_VL_HW_0_2	= 3,
403	MLX5_VL_HW_0_3	= 4,
404	MLX5_VL_HW_0_4	= 5,
405	MLX5_VL_HW_0_5	= 6,
406	MLX5_VL_HW_0_6	= 7,
407	MLX5_VL_HW_0_7	= 8,
408	MLX5_VL_HW_0_14	= 15
409};
410
411static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
412				u8 *max_vl_num)
413{
414	switch (vl_hw_cap) {
415	case MLX5_VL_HW_0:
416		*max_vl_num = __IB_MAX_VL_0;
417		break;
418	case MLX5_VL_HW_0_1:
419		*max_vl_num = __IB_MAX_VL_0_1;
420		break;
421	case MLX5_VL_HW_0_3:
422		*max_vl_num = __IB_MAX_VL_0_3;
423		break;
424	case MLX5_VL_HW_0_7:
425		*max_vl_num = __IB_MAX_VL_0_7;
426		break;
427	case MLX5_VL_HW_0_14:
428		*max_vl_num = __IB_MAX_VL_0_14;
429		break;
430
431	default:
432		return -EINVAL;
433	}
434
435	return 0;
436}
437
438static int mlx5_query_port_ib(struct ib_device *ibdev, u8 port,
439			      struct ib_port_attr *props)
440{
441	struct mlx5_ib_dev *dev = to_mdev(ibdev);
442	struct mlx5_core_dev *mdev = dev->mdev;
443	u32 *rep;
444	int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
445	struct mlx5_ptys_reg *ptys;
446	struct mlx5_pmtu_reg *pmtu;
447	struct mlx5_pvlc_reg pvlc;
448	void *ctx;
449	int err;
450
451	rep = mlx5_vzalloc(outlen);
452	ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
453	pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
454	if (!rep || !ptys || !pmtu) {
455		err = -ENOMEM;
456		goto out;
457	}
458
459	memset(props, 0, sizeof(*props));
460
461	/* what if I am pf with dual port */
462	err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen);
463	if (err)
464		goto out;
465
466	ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
467
468	props->lid		= MLX5_GET(hca_vport_context, ctx, lid);
469	props->lmc		= MLX5_GET(hca_vport_context, ctx, lmc);
470	props->sm_lid		= MLX5_GET(hca_vport_context, ctx, sm_lid);
471	props->sm_sl		= MLX5_GET(hca_vport_context, ctx, sm_sl);
472	props->state		= MLX5_GET(hca_vport_context, ctx, vport_state);
473	props->phys_state	= MLX5_GET(hca_vport_context, ctx,
474					port_physical_state);
475	props->port_cap_flags	= MLX5_GET(hca_vport_context, ctx, cap_mask1);
476	props->gid_tbl_len	= mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
477	props->max_msg_sz	= 1 << MLX5_CAP_GEN(mdev, log_max_msg);
478	props->pkey_tbl_len	= mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
479	props->bad_pkey_cntr	= MLX5_GET(hca_vport_context, ctx,
480					      pkey_violation_counter);
481	props->qkey_viol_cntr	= MLX5_GET(hca_vport_context, ctx,
482					      qkey_violation_counter);
483	props->subnet_timeout	= MLX5_GET(hca_vport_context, ctx,
484					      subnet_timeout);
485	props->init_type_reply	= MLX5_GET(hca_vport_context, ctx,
486					   init_type_reply);
487
488	ptys->proto_mask |= MLX5_PTYS_IB;
489	ptys->local_port = port;
490	err = mlx5_core_access_ptys(mdev, ptys, 0);
491	if (err)
492		goto out;
493
494	err = translate_active_width(ibdev, ptys->ib_link_width_oper,
495				     &props->active_width);
496	if (err)
497		goto out;
498
499	props->active_speed	= (u8)ptys->ib_proto_oper;
500
501	pmtu->local_port = port;
502	err = mlx5_core_access_pmtu(mdev, pmtu, 0);
503	if (err)
504		goto out;
505
506	props->max_mtu		= pmtu->max_mtu;
507	props->active_mtu	= pmtu->oper_mtu;
508
509	memset(&pvlc, 0, sizeof(pvlc));
510	pvlc.local_port = port;
511	err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
512	if (err)
513		goto out;
514
515	err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
516				   &props->max_vl_num);
517out:
518	kvfree(rep);
519	kfree(ptys);
520	kfree(pmtu);
521	return err;
522}
523
524int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
525		       struct ib_port_attr *props)
526{
527	switch (mlx5_get_vport_access_method(ibdev)) {
528	case MLX5_VPORT_ACCESS_METHOD_MAD:
529		return mlx5_query_port_mad_ifc(ibdev, port, props);
530
531	case MLX5_VPORT_ACCESS_METHOD_HCA:
532		return mlx5_query_port_ib(ibdev, port, props);
533
534	case MLX5_VPORT_ACCESS_METHOD_NIC:
535		return mlx5_query_port_roce(ibdev, port, props);
536
537	default:
538		return -EINVAL;
539	}
540}
541
542static inline int
543mlx5_addrconf_ifid_eui48(u8 *eui, struct net_device *dev)
544{
545	if (dev->if_addrlen != ETH_ALEN)
546		return -1;
547	memcpy(eui, IF_LLADDR(dev), 3);
548	memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
549
550	/* NOTE: The scope ID is added by the GID to IP conversion */
551
552	eui[3] = 0xFF;
553	eui[4] = 0xFE;
554	eui[0] ^= 2;
555	return 0;
556}
557
558static void
559mlx5_make_default_gid(struct net_device *dev, union ib_gid *gid)
560{
561	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
562	mlx5_addrconf_ifid_eui48(&gid->raw[8], dev);
563}
564
565static void
566mlx5_ib_roce_port_update(void *arg)
567{
568	struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg;
569	struct mlx5_ib_dev *dev = port->dev;
570	struct mlx5_core_dev *mdev = dev->mdev;
571	struct net_device *xdev[MLX5_IB_GID_MAX];
572	struct net_device *idev;
573	struct net_device *ndev;
574	struct ifaddr *ifa;
575	union ib_gid gid_temp;
576
577	while (port->port_gone == 0) {
578		int update = 0;
579		int gid_index = 0;
580		int j;
581		int error;
582
583		ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH);
584		if (ndev == NULL) {
585			pause("W", hz);
586			continue;
587		}
588
589		CURVNET_SET_QUIET(ndev->if_vnet);
590
591		memset(&gid_temp, 0, sizeof(gid_temp));
592		mlx5_make_default_gid(ndev, &gid_temp);
593		if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
594			port->gid_table[gid_index] = gid_temp;
595			update = 1;
596		}
597		xdev[gid_index] = ndev;
598		gid_index++;
599
600		IFNET_RLOCK();
601		TAILQ_FOREACH(idev, &V_ifnet, if_link) {
602			if (idev == ndev)
603				break;
604		}
605		if (idev != NULL) {
606		    TAILQ_FOREACH(idev, &V_ifnet, if_link) {
607			if (idev != ndev) {
608				if (idev->if_type != IFT_L2VLAN)
609					continue;
610				if (ndev != rdma_vlan_dev_real_dev(idev))
611					continue;
612			}
613			/* clone address information for IPv4 and IPv6 */
614			IF_ADDR_RLOCK(idev);
615			TAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
616				if (ifa->ifa_addr == NULL ||
617				    (ifa->ifa_addr->sa_family != AF_INET &&
618				     ifa->ifa_addr->sa_family != AF_INET6) ||
619				    gid_index >= MLX5_IB_GID_MAX)
620					continue;
621				memset(&gid_temp, 0, sizeof(gid_temp));
622				rdma_ip2gid(ifa->ifa_addr, &gid_temp);
623				/* check for existing entry */
624				for (j = 0; j != gid_index; j++) {
625					if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0)
626						break;
627				}
628				/* check if new entry must be added */
629				if (j == gid_index) {
630					if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
631						port->gid_table[gid_index] = gid_temp;
632						update = 1;
633					}
634					xdev[gid_index] = idev;
635					gid_index++;
636				}
637			}
638			IF_ADDR_RUNLOCK(idev);
639		    }
640		}
641		IFNET_RUNLOCK();
642		CURVNET_RESTORE();
643
644		if (update != 0 &&
645		    mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) {
646			struct ib_event event = {
647			    .device = &dev->ib_dev,
648			    .element.port_num = port->port_num + 1,
649			    .event = IB_EVENT_GID_CHANGE,
650			};
651
652			/* add new entries, if any */
653			for (j = 0; j != gid_index; j++) {
654				error = modify_gid_roce(&dev->ib_dev, port->port_num, j,
655				    port->gid_table + j, xdev[j]);
656				if (error != 0)
657					printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error);
658			}
659			memset(&gid_temp, 0, sizeof(gid_temp));
660
661			/* clear old entries, if any */
662			for (; j != MLX5_IB_GID_MAX; j++) {
663				if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0)
664					continue;
665				port->gid_table[j] = gid_temp;
666				(void) modify_gid_roce(&dev->ib_dev, port->port_num, j,
667				    port->gid_table + j, ndev);
668			}
669
670			/* make sure ibcore gets updated */
671			ib_dispatch_event(&event);
672		}
673		pause("W", hz);
674	}
675	do {
676		struct ib_event event = {
677			.device = &dev->ib_dev,
678			.element.port_num = port->port_num + 1,
679			.event = IB_EVENT_GID_CHANGE,
680		};
681		/* make sure ibcore gets updated */
682		ib_dispatch_event(&event);
683
684		/* wait a bit */
685		pause("W", hz);
686	} while (0);
687	port->port_gone = 2;
688	kthread_exit();
689}
690
691static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
692			     union ib_gid *gid)
693{
694	struct mlx5_ib_dev *dev = to_mdev(ibdev);
695	struct mlx5_core_dev *mdev = dev->mdev;
696
697	switch (mlx5_get_vport_access_method(ibdev)) {
698	case MLX5_VPORT_ACCESS_METHOD_MAD:
699		return mlx5_query_gids_mad_ifc(ibdev, port, index, gid);
700
701	case MLX5_VPORT_ACCESS_METHOD_HCA:
702		return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
703
704	case MLX5_VPORT_ACCESS_METHOD_NIC:
705		if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) ||
706		    index < 0 || index >= MLX5_IB_GID_MAX ||
707		    dev->port[port - 1].port_gone != 0)
708			memset(gid, 0, sizeof(*gid));
709		else
710			*gid = dev->port[port - 1].gid_table[index];
711		return 0;
712
713	default:
714		return -EINVAL;
715	}
716}
717
718static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
719			      u16 *pkey)
720{
721	struct mlx5_ib_dev *dev = to_mdev(ibdev);
722	struct mlx5_core_dev *mdev = dev->mdev;
723
724	switch (mlx5_get_vport_access_method(ibdev)) {
725	case MLX5_VPORT_ACCESS_METHOD_MAD:
726		return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey);
727
728	case MLX5_VPORT_ACCESS_METHOD_HCA:
729	case MLX5_VPORT_ACCESS_METHOD_NIC:
730		return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
731						 pkey);
732
733	default:
734		return -EINVAL;
735	}
736}
737
738static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
739				 struct ib_device_modify *props)
740{
741	struct mlx5_ib_dev *dev = to_mdev(ibdev);
742	struct mlx5_reg_node_desc in;
743	struct mlx5_reg_node_desc out;
744	int err;
745
746	if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
747		return -EOPNOTSUPP;
748
749	if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
750		return 0;
751
752	/*
753	 * If possible, pass node desc to FW, so it can generate
754	 * a 144 trap.  If cmd fails, just ignore.
755	 */
756	memcpy(&in, props->node_desc, 64);
757	err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
758				   sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
759	if (err)
760		return err;
761
762	memcpy(ibdev->node_desc, props->node_desc, 64);
763
764	return err;
765}
766
767static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
768			       struct ib_port_modify *props)
769{
770	u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) ==
771		     IB_LINK_LAYER_ETHERNET);
772	struct mlx5_ib_dev *dev = to_mdev(ibdev);
773	struct ib_port_attr attr;
774	u32 tmp;
775	int err;
776
777	/* return OK if this is RoCE. CM calls ib_modify_port() regardless
778	 * of whether port link layer is ETH or IB. For ETH ports, qkey
779	 * violations and port capabilities are not valid.
780	 */
781	if (is_eth)
782		return 0;
783
784	mutex_lock(&dev->cap_mask_mutex);
785
786	err = mlx5_ib_query_port(ibdev, port, &attr);
787	if (err)
788		goto out;
789
790	tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
791		~props->clr_port_cap_mask;
792
793	err = mlx5_set_port_caps(dev->mdev, port, tmp);
794
795out:
796	mutex_unlock(&dev->cap_mask_mutex);
797	return err;
798}
799
800enum mlx5_cap_flags {
801	MLX5_CAP_COMPACT_AV = 1 << 0,
802};
803
804static void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev)
805{
806	*flags |= MLX5_CAP_GEN(dev, compact_address_vector) ?
807		  MLX5_CAP_COMPACT_AV : 0;
808}
809
810static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
811						  struct ib_udata *udata)
812{
813	struct mlx5_ib_dev *dev = to_mdev(ibdev);
814	struct mlx5_ib_alloc_ucontext_req_v2 req;
815	struct mlx5_ib_alloc_ucontext_resp resp;
816	struct mlx5_ib_ucontext *context;
817	struct mlx5_uuar_info *uuari;
818	struct mlx5_uar *uars;
819	int gross_uuars;
820	int num_uars;
821	int ver;
822	int uuarn;
823	int err;
824	int i;
825	size_t reqlen;
826
827	if (!dev->ib_active)
828		return ERR_PTR(-EAGAIN);
829
830	memset(&req, 0, sizeof(req));
831	memset(&resp, 0, sizeof(resp));
832
833	reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
834	if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
835		ver = 0;
836	else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
837		ver = 2;
838	else {
839		mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen);
840		return ERR_PTR(-EINVAL);
841	}
842
843	err = ib_copy_from_udata(&req, udata, reqlen);
844	if (err) {
845		mlx5_ib_err(dev, "copy failed\n");
846		return ERR_PTR(err);
847	}
848
849	if (req.reserved) {
850		mlx5_ib_err(dev, "request corrupted\n");
851		return ERR_PTR(-EINVAL);
852	}
853
854	if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) {
855		mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars);
856		return ERR_PTR(-ENOMEM);
857	}
858
859	req.total_num_uuars = ALIGN(req.total_num_uuars,
860				    MLX5_NON_FP_BF_REGS_PER_PAGE);
861	if (req.num_low_latency_uuars > req.total_num_uuars - 1) {
862		mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n",
863			     req.total_num_uuars, req.total_num_uuars);
864		return ERR_PTR(-EINVAL);
865	}
866
867	num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
868	gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
869	resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
870	if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
871		resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
872	resp.cache_line_size = L1_CACHE_BYTES;
873	resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
874	resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
875	resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
876	resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
877	resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
878	set_mlx5_flags(&resp.flags, dev->mdev);
879
880	if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen)
881		resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc);
882
883	if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen)
884		resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
885
886	context = kzalloc(sizeof(*context), GFP_KERNEL);
887	if (!context)
888		return ERR_PTR(-ENOMEM);
889
890	uuari = &context->uuari;
891	mutex_init(&uuari->lock);
892	uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
893	if (!uars) {
894		err = -ENOMEM;
895		goto out_ctx;
896	}
897
898	uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
899				sizeof(*uuari->bitmap),
900				GFP_KERNEL);
901	if (!uuari->bitmap) {
902		err = -ENOMEM;
903		goto out_uar_ctx;
904	}
905	/*
906	 * clear all fast path uuars
907	 */
908	for (i = 0; i < gross_uuars; i++) {
909		uuarn = i & 3;
910		if (uuarn == 2 || uuarn == 3)
911			set_bit(i, uuari->bitmap);
912	}
913
914	uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
915	if (!uuari->count) {
916		err = -ENOMEM;
917		goto out_bitmap;
918	}
919
920	for (i = 0; i < num_uars; i++) {
921		err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
922		if (err) {
923			mlx5_ib_err(dev, "uar alloc failed at %d\n", i);
924			goto out_uars;
925		}
926	}
927	for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++)
928		context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX;
929
930	INIT_LIST_HEAD(&context->db_page_list);
931	mutex_init(&context->db_page_mutex);
932
933	resp.tot_uuars = req.total_num_uuars;
934	resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
935	err = ib_copy_to_udata(udata, &resp,
936			       min_t(size_t, udata->outlen, sizeof(resp)));
937	if (err)
938		goto out_uars;
939
940	uuari->ver = ver;
941	uuari->num_low_latency_uuars = req.num_low_latency_uuars;
942	uuari->uars = uars;
943	uuari->num_uars = num_uars;
944
945	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
946	    IB_LINK_LAYER_ETHERNET) {
947		err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn);
948		if (err)
949			goto out_uars;
950	}
951
952	return &context->ibucontext;
953
954out_uars:
955	for (i--; i >= 0; i--)
956		mlx5_cmd_free_uar(dev->mdev, uars[i].index);
957	kfree(uuari->count);
958
959out_bitmap:
960	kfree(uuari->bitmap);
961
962out_uar_ctx:
963	kfree(uars);
964
965out_ctx:
966	kfree(context);
967	return ERR_PTR(err);
968}
969
970static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
971{
972	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
973	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
974	struct mlx5_uuar_info *uuari = &context->uuari;
975	int i;
976
977	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
978	    IB_LINK_LAYER_ETHERNET)
979		mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
980
981	for (i = 0; i < uuari->num_uars; i++) {
982		if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
983			mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
984	}
985	for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) {
986		if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX)
987			mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]);
988	}
989
990	kfree(uuari->count);
991	kfree(uuari->bitmap);
992	kfree(uuari->uars);
993	kfree(context);
994
995	return 0;
996}
997
998static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
999{
1000	return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
1001}
1002
1003static int get_command(unsigned long offset)
1004{
1005	return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
1006}
1007
1008static int get_arg(unsigned long offset)
1009{
1010	return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
1011}
1012
1013static int get_index(unsigned long offset)
1014{
1015	return get_arg(offset);
1016}
1017
1018static int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc,
1019		    struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev,
1020		    struct mlx5_ib_ucontext *context)
1021{
1022	unsigned long idx;
1023	phys_addr_t pfn;
1024
1025	if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
1026		mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n",
1027			     (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start));
1028		return -EINVAL;
1029	}
1030
1031	idx = get_index(vma->vm_pgoff);
1032	if (idx >= uuari->num_uars) {
1033		mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n",
1034			     idx, uuari->num_uars);
1035		return -EINVAL;
1036	}
1037
1038	pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1039	mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
1040		    (unsigned long long)pfn);
1041
1042	vma->vm_page_prot = prot;
1043	if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1044			       PAGE_SIZE, vma->vm_page_prot)) {
1045		mlx5_ib_err(dev, "io remap failed\n");
1046		return -EAGAIN;
1047	}
1048
1049	mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC",
1050		    (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT);
1051
1052	return 0;
1053}
1054
1055static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
1056{
1057	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1058	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1059	struct mlx5_uuar_info *uuari = &context->uuari;
1060	unsigned long command;
1061
1062	command = get_command(vma->vm_pgoff);
1063	switch (command) {
1064	case MLX5_IB_MMAP_REGULAR_PAGE:
1065		return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1066				true,
1067				uuari, dev, context);
1068
1069		break;
1070
1071	case MLX5_IB_MMAP_WC_PAGE:
1072		return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1073				true, uuari, dev, context);
1074		break;
1075
1076	case MLX5_IB_MMAP_NC_PAGE:
1077		return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot),
1078				false, uuari, dev, context);
1079		break;
1080
1081	default:
1082		return -EINVAL;
1083	}
1084
1085	return 0;
1086}
1087
1088static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
1089{
1090	struct mlx5_create_mkey_mbox_in *in;
1091	struct mlx5_mkey_seg *seg;
1092	struct mlx5_core_mr mr;
1093	int err;
1094
1095	in = kzalloc(sizeof(*in), GFP_KERNEL);
1096	if (!in)
1097		return -ENOMEM;
1098
1099	seg = &in->seg;
1100	seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
1101	seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
1102	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1103	seg->start_addr = 0;
1104
1105	err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
1106				    NULL, NULL, NULL);
1107	if (err) {
1108		mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
1109		goto err_in;
1110	}
1111
1112	kfree(in);
1113	*key = mr.key;
1114
1115	return 0;
1116
1117err_in:
1118	kfree(in);
1119
1120	return err;
1121}
1122
1123static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
1124{
1125	struct mlx5_core_mr mr;
1126	int err;
1127
1128	memset(&mr, 0, sizeof(mr));
1129	mr.key = key;
1130	err = mlx5_core_destroy_mkey(dev->mdev, &mr);
1131	if (err)
1132		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
1133}
1134
1135static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
1136				      struct ib_ucontext *context,
1137				      struct ib_udata *udata)
1138{
1139	struct mlx5_ib_dev *dev = to_mdev(ibdev);
1140	struct mlx5_ib_alloc_pd_resp resp;
1141	struct mlx5_ib_pd *pd;
1142	int err;
1143
1144	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
1145	if (!pd)
1146		return ERR_PTR(-ENOMEM);
1147
1148	err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
1149	if (err) {
1150		mlx5_ib_warn(dev, "pd alloc failed\n");
1151		kfree(pd);
1152		return ERR_PTR(err);
1153	}
1154
1155	if (context) {
1156		resp.pdn = pd->pdn;
1157		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
1158			mlx5_ib_err(dev, "copy failed\n");
1159			mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1160			kfree(pd);
1161			return ERR_PTR(-EFAULT);
1162		}
1163	} else {
1164		err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
1165		if (err) {
1166			mlx5_ib_err(dev, "alloc mkey failed\n");
1167			mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1168			kfree(pd);
1169			return ERR_PTR(err);
1170		}
1171	}
1172
1173	return &pd->ibpd;
1174}
1175
1176static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
1177{
1178	struct mlx5_ib_dev *mdev = to_mdev(pd->device);
1179	struct mlx5_ib_pd *mpd = to_mpd(pd);
1180
1181	if (!pd->uobject)
1182		free_pa_mkey(mdev, mpd->pa_lkey);
1183
1184	mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
1185	kfree(mpd);
1186
1187	return 0;
1188}
1189
1190static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1191{
1192	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1193	int err;
1194
1195	if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1196		err = -EOPNOTSUPP;
1197	else
1198		err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
1199	if (err)
1200		mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
1201			     ibqp->qp_num, gid->raw);
1202
1203	return err;
1204}
1205
1206static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1207{
1208	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1209	int err;
1210
1211	if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1212		err = -EOPNOTSUPP;
1213	else
1214		err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
1215	if (err)
1216		mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
1217			     ibqp->qp_num, gid->raw);
1218
1219	return err;
1220}
1221
1222static int init_node_data(struct mlx5_ib_dev *dev)
1223{
1224	int err;
1225
1226	err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
1227	if (err)
1228		return err;
1229
1230	return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
1231}
1232
1233static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
1234			     char *buf)
1235{
1236	struct mlx5_ib_dev *dev =
1237		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1238
1239	return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
1240}
1241
1242static ssize_t show_reg_pages(struct device *device,
1243			      struct device_attribute *attr, char *buf)
1244{
1245	struct mlx5_ib_dev *dev =
1246		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1247
1248	return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
1249}
1250
1251static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1252			char *buf)
1253{
1254	struct mlx5_ib_dev *dev =
1255		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1256	return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1257}
1258
1259static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1260			   char *buf)
1261{
1262	struct mlx5_ib_dev *dev =
1263		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1264	return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
1265		       fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1266}
1267
1268static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1269			char *buf)
1270{
1271	struct mlx5_ib_dev *dev =
1272		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1273	return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision);
1274}
1275
1276static ssize_t show_board(struct device *device, struct device_attribute *attr,
1277			  char *buf)
1278{
1279	struct mlx5_ib_dev *dev =
1280		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1281	return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
1282		       dev->mdev->board_id);
1283}
1284
1285static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1286static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1287static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1288static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1289static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
1290static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1291
1292static struct device_attribute *mlx5_class_attributes[] = {
1293	&dev_attr_hw_rev,
1294	&dev_attr_fw_ver,
1295	&dev_attr_hca_type,
1296	&dev_attr_board_id,
1297	&dev_attr_fw_pages,
1298	&dev_attr_reg_pages,
1299};
1300
1301static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
1302{
1303	struct mlx5_ib_qp *mqp;
1304	struct mlx5_ib_cq *send_mcq, *recv_mcq;
1305	struct mlx5_core_cq *mcq;
1306	struct list_head cq_armed_list;
1307	unsigned long flags_qp;
1308	unsigned long flags_cq;
1309	unsigned long flags;
1310
1311	mlx5_ib_warn(ibdev, " started\n");
1312	INIT_LIST_HEAD(&cq_armed_list);
1313
1314	/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
1315	spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
1316	list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
1317		spin_lock_irqsave(&mqp->sq.lock, flags_qp);
1318		if (mqp->sq.tail != mqp->sq.head) {
1319			send_mcq = to_mcq(mqp->ibqp.send_cq);
1320			spin_lock_irqsave(&send_mcq->lock, flags_cq);
1321			if (send_mcq->mcq.comp &&
1322			    mqp->ibqp.send_cq->comp_handler) {
1323				if (!send_mcq->mcq.reset_notify_added) {
1324					send_mcq->mcq.reset_notify_added = 1;
1325					list_add_tail(&send_mcq->mcq.reset_notify,
1326						      &cq_armed_list);
1327				}
1328			}
1329			spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
1330		}
1331		spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
1332		spin_lock_irqsave(&mqp->rq.lock, flags_qp);
1333		/* no handling is needed for SRQ */
1334		if (!mqp->ibqp.srq) {
1335			if (mqp->rq.tail != mqp->rq.head) {
1336				recv_mcq = to_mcq(mqp->ibqp.recv_cq);
1337				spin_lock_irqsave(&recv_mcq->lock, flags_cq);
1338				if (recv_mcq->mcq.comp &&
1339				    mqp->ibqp.recv_cq->comp_handler) {
1340					if (!recv_mcq->mcq.reset_notify_added) {
1341						recv_mcq->mcq.reset_notify_added = 1;
1342						list_add_tail(&recv_mcq->mcq.reset_notify,
1343							      &cq_armed_list);
1344					}
1345				}
1346				spin_unlock_irqrestore(&recv_mcq->lock,
1347						       flags_cq);
1348			}
1349		}
1350		spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
1351	}
1352	/*At that point all inflight post send were put to be executed as of we
1353	 * lock/unlock above locks Now need to arm all involved CQs.
1354	 */
1355	list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
1356		mcq->comp(mcq);
1357	}
1358	spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
1359	mlx5_ib_warn(ibdev, " ended\n");
1360	return;
1361}
1362
1363static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1364			  enum mlx5_dev_event event, unsigned long param)
1365{
1366	struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
1367	struct ib_event ibev;
1368
1369	u8 port = 0;
1370
1371	switch (event) {
1372	case MLX5_DEV_EVENT_SYS_ERROR:
1373		ibdev->ib_active = false;
1374		ibev.event = IB_EVENT_DEVICE_FATAL;
1375		mlx5_ib_handle_internal_error(ibdev);
1376		break;
1377
1378	case MLX5_DEV_EVENT_PORT_UP:
1379		ibev.event = IB_EVENT_PORT_ACTIVE;
1380		port = (u8)param;
1381		break;
1382
1383	case MLX5_DEV_EVENT_PORT_DOWN:
1384	case MLX5_DEV_EVENT_PORT_INITIALIZED:
1385		ibev.event = IB_EVENT_PORT_ERR;
1386		port = (u8)param;
1387		break;
1388
1389	case MLX5_DEV_EVENT_LID_CHANGE:
1390		ibev.event = IB_EVENT_LID_CHANGE;
1391		port = (u8)param;
1392		break;
1393
1394	case MLX5_DEV_EVENT_PKEY_CHANGE:
1395		ibev.event = IB_EVENT_PKEY_CHANGE;
1396		port = (u8)param;
1397		break;
1398
1399	case MLX5_DEV_EVENT_GUID_CHANGE:
1400		ibev.event = IB_EVENT_GID_CHANGE;
1401		port = (u8)param;
1402		break;
1403
1404	case MLX5_DEV_EVENT_CLIENT_REREG:
1405		ibev.event = IB_EVENT_CLIENT_REREGISTER;
1406		port = (u8)param;
1407		break;
1408
1409	default:
1410		break;
1411	}
1412
1413	ibev.device	      = &ibdev->ib_dev;
1414	ibev.element.port_num = port;
1415
1416	if ((event != MLX5_DEV_EVENT_SYS_ERROR) &&
1417	    (port < 1 || port > ibdev->num_ports)) {
1418		mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
1419		return;
1420	}
1421
1422	if (ibdev->ib_active)
1423		ib_dispatch_event(&ibev);
1424}
1425
1426static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1427{
1428	int port;
1429
1430	for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1431		mlx5_query_ext_port_caps(dev, port);
1432}
1433
1434static void config_atomic_responder(struct mlx5_ib_dev *dev,
1435				    struct ib_device_attr *props)
1436{
1437	enum ib_atomic_cap cap = props->atomic_cap;
1438
1439#if 0
1440	if (cap == IB_ATOMIC_HCA ||
1441	    cap == IB_ATOMIC_GLOB)
1442#endif
1443		dev->enable_atomic_resp = 1;
1444
1445	dev->atomic_cap = cap;
1446}
1447
1448enum mlx5_addr_align {
1449	MLX5_ADDR_ALIGN_0	= 0,
1450	MLX5_ADDR_ALIGN_64	= 64,
1451	MLX5_ADDR_ALIGN_128	= 128,
1452};
1453
1454static int get_port_caps(struct mlx5_ib_dev *dev)
1455{
1456	struct ib_device_attr *dprops = NULL;
1457	struct ib_port_attr *pprops = NULL;
1458	int err = -ENOMEM;
1459	int port;
1460
1461	pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1462	if (!pprops)
1463		goto out;
1464
1465	dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1466	if (!dprops)
1467		goto out;
1468
1469	err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1470	if (err) {
1471		mlx5_ib_warn(dev, "query_device failed %d\n", err);
1472		goto out;
1473	}
1474	config_atomic_responder(dev, dprops);
1475
1476	for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1477		err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1478		if (err) {
1479			mlx5_ib_warn(dev, "query_port %d failed %d\n",
1480				     port, err);
1481			break;
1482		}
1483		dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys;
1484		dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len;
1485		mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1486			    dprops->max_pkeys, pprops->gid_tbl_len);
1487	}
1488
1489out:
1490	kfree(pprops);
1491	kfree(dprops);
1492
1493	return err;
1494}
1495
1496static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1497{
1498	int err;
1499
1500	err = mlx5_mr_cache_cleanup(dev);
1501	if (err)
1502		mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1503
1504	ib_dereg_mr(dev->umrc.mr);
1505	ib_dealloc_pd(dev->umrc.pd);
1506}
1507
1508enum {
1509	MAX_UMR_WR = 128,
1510};
1511
1512static int create_umr_res(struct mlx5_ib_dev *dev)
1513{
1514	struct ib_pd *pd;
1515	struct ib_mr *mr;
1516	int ret;
1517
1518	pd = ib_alloc_pd(&dev->ib_dev);
1519	if (IS_ERR(pd)) {
1520		mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1521		ret = PTR_ERR(pd);
1522		goto error_0;
1523	}
1524
1525	mr = ib_get_dma_mr(pd,  IB_ACCESS_LOCAL_WRITE);
1526	if (IS_ERR(mr)) {
1527		mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1528		ret = PTR_ERR(mr);
1529		goto error_1;
1530	}
1531
1532	dev->umrc.mr = mr;
1533	dev->umrc.pd = pd;
1534
1535	ret = mlx5_mr_cache_init(dev);
1536	if (ret) {
1537		mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1538		goto error_4;
1539	}
1540
1541	return 0;
1542
1543error_4:
1544	ib_dereg_mr(mr);
1545error_1:
1546	ib_dealloc_pd(pd);
1547error_0:
1548	return ret;
1549}
1550
1551static int create_dev_resources(struct mlx5_ib_resources *devr)
1552{
1553	struct ib_srq_init_attr attr;
1554	struct mlx5_ib_dev *dev;
1555	int ret = 0;
1556	struct ib_cq_init_attr cq_attr = { .cqe = 1 };
1557
1558	dev = container_of(devr, struct mlx5_ib_dev, devr);
1559
1560	devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1561	if (IS_ERR(devr->p0)) {
1562		ret = PTR_ERR(devr->p0);
1563		goto error0;
1564	}
1565	devr->p0->device  = &dev->ib_dev;
1566	devr->p0->uobject = NULL;
1567	atomic_set(&devr->p0->usecnt, 0);
1568
1569	devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, &cq_attr, NULL, NULL);
1570	if (IS_ERR(devr->c0)) {
1571		ret = PTR_ERR(devr->c0);
1572		goto error1;
1573	}
1574	devr->c0->device        = &dev->ib_dev;
1575	devr->c0->uobject       = NULL;
1576	devr->c0->comp_handler  = NULL;
1577	devr->c0->event_handler = NULL;
1578	devr->c0->cq_context    = NULL;
1579	atomic_set(&devr->c0->usecnt, 0);
1580
1581	devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1582	if (IS_ERR(devr->x0)) {
1583		ret = PTR_ERR(devr->x0);
1584		goto error2;
1585	}
1586	devr->x0->device = &dev->ib_dev;
1587	devr->x0->inode = NULL;
1588	atomic_set(&devr->x0->usecnt, 0);
1589	mutex_init(&devr->x0->tgt_qp_mutex);
1590	INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1591
1592	devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1593	if (IS_ERR(devr->x1)) {
1594		ret = PTR_ERR(devr->x1);
1595		goto error3;
1596	}
1597	devr->x1->device = &dev->ib_dev;
1598	devr->x1->inode = NULL;
1599	atomic_set(&devr->x1->usecnt, 0);
1600	mutex_init(&devr->x1->tgt_qp_mutex);
1601	INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1602
1603	memset(&attr, 0, sizeof(attr));
1604	attr.attr.max_sge = 1;
1605	attr.attr.max_wr = 1;
1606	attr.srq_type = IB_SRQT_XRC;
1607	attr.ext.xrc.cq = devr->c0;
1608	attr.ext.xrc.xrcd = devr->x0;
1609
1610	devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1611	if (IS_ERR(devr->s0)) {
1612		ret = PTR_ERR(devr->s0);
1613		goto error4;
1614	}
1615	devr->s0->device	= &dev->ib_dev;
1616	devr->s0->pd		= devr->p0;
1617	devr->s0->uobject       = NULL;
1618	devr->s0->event_handler = NULL;
1619	devr->s0->srq_context   = NULL;
1620	devr->s0->srq_type      = IB_SRQT_XRC;
1621	devr->s0->ext.xrc.xrcd  = devr->x0;
1622	devr->s0->ext.xrc.cq	= devr->c0;
1623	atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1624	atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1625	atomic_inc(&devr->p0->usecnt);
1626	atomic_set(&devr->s0->usecnt, 0);
1627
1628	memset(&attr, 0, sizeof(attr));
1629	attr.attr.max_sge = 1;
1630	attr.attr.max_wr = 1;
1631	attr.srq_type = IB_SRQT_BASIC;
1632	devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1633	if (IS_ERR(devr->s1)) {
1634		ret = PTR_ERR(devr->s1);
1635		goto error5;
1636	}
1637	devr->s1->device	= &dev->ib_dev;
1638	devr->s1->pd		= devr->p0;
1639	devr->s1->uobject       = NULL;
1640	devr->s1->event_handler = NULL;
1641	devr->s1->srq_context   = NULL;
1642	devr->s1->srq_type      = IB_SRQT_BASIC;
1643	devr->s1->ext.xrc.cq	= devr->c0;
1644	atomic_inc(&devr->p0->usecnt);
1645	atomic_set(&devr->s1->usecnt, 0);
1646
1647	return 0;
1648
1649error5:
1650	mlx5_ib_destroy_srq(devr->s0);
1651error4:
1652	mlx5_ib_dealloc_xrcd(devr->x1);
1653error3:
1654	mlx5_ib_dealloc_xrcd(devr->x0);
1655error2:
1656	mlx5_ib_destroy_cq(devr->c0);
1657error1:
1658	mlx5_ib_dealloc_pd(devr->p0);
1659error0:
1660	return ret;
1661}
1662
1663static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1664{
1665	mlx5_ib_destroy_srq(devr->s1);
1666	mlx5_ib_destroy_srq(devr->s0);
1667	mlx5_ib_dealloc_xrcd(devr->x0);
1668	mlx5_ib_dealloc_xrcd(devr->x1);
1669	mlx5_ib_destroy_cq(devr->c0);
1670	mlx5_ib_dealloc_pd(devr->p0);
1671}
1672
1673static void enable_dc_tracer(struct mlx5_ib_dev *dev)
1674{
1675	struct device *device = dev->ib_dev.dma_device;
1676	struct mlx5_dc_tracer *dct = &dev->dctr;
1677	int order;
1678	void *tmp;
1679	int size;
1680	int err;
1681
1682	size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096;
1683	if (size <= PAGE_SIZE)
1684		order = 0;
1685	else
1686		order = 1;
1687
1688	dct->pg = alloc_pages(GFP_KERNEL, order);
1689	if (!dct->pg) {
1690		mlx5_ib_err(dev, "failed to allocate %d pages\n", order);
1691		return;
1692	}
1693
1694	tmp = page_address(dct->pg);
1695	memset(tmp, 0xff, size);
1696
1697	dct->size = size;
1698	dct->order = order;
1699	dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE);
1700	if (dma_mapping_error(device, dct->dma)) {
1701		mlx5_ib_err(dev, "dma mapping error\n");
1702		goto map_err;
1703	}
1704
1705	err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma);
1706	if (err) {
1707		mlx5_ib_warn(dev, "failed to enable DC tracer\n");
1708		goto cmd_err;
1709	}
1710
1711	return;
1712
1713cmd_err:
1714	dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE);
1715map_err:
1716	__free_pages(dct->pg, dct->order);
1717	dct->pg = NULL;
1718}
1719
1720static void disable_dc_tracer(struct mlx5_ib_dev *dev)
1721{
1722	struct device *device = dev->ib_dev.dma_device;
1723	struct mlx5_dc_tracer *dct = &dev->dctr;
1724	int err;
1725
1726	if (!dct->pg)
1727		return;
1728
1729	err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma);
1730	if (err) {
1731		mlx5_ib_warn(dev, "failed to disable DC tracer\n");
1732		return;
1733	}
1734
1735	dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE);
1736	__free_pages(dct->pg, dct->order);
1737	dct->pg = NULL;
1738}
1739
1740enum {
1741	MLX5_DC_CNAK_SIZE		= 128,
1742	MLX5_NUM_BUF_IN_PAGE		= PAGE_SIZE / MLX5_DC_CNAK_SIZE,
1743	MLX5_CNAK_TX_CQ_SIGNAL_FACTOR	= 128,
1744	MLX5_DC_CNAK_SL			= 0,
1745	MLX5_DC_CNAK_VL			= 0,
1746};
1747
1748static int init_dc_improvements(struct mlx5_ib_dev *dev)
1749{
1750	if (!mlx5_core_is_pf(dev->mdev))
1751		return 0;
1752
1753	if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace)))
1754		return 0;
1755
1756	enable_dc_tracer(dev);
1757
1758	return 0;
1759}
1760
1761static void cleanup_dc_improvements(struct mlx5_ib_dev *dev)
1762{
1763
1764	disable_dc_tracer(dev);
1765}
1766
1767static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
1768{
1769	mlx5_vport_dealloc_q_counter(dev->mdev,
1770				     MLX5_INTERFACE_PROTOCOL_IB,
1771				     dev->port[port_num].q_cnt_id);
1772	dev->port[port_num].q_cnt_id = 0;
1773}
1774
1775static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
1776{
1777	unsigned int i;
1778
1779	for (i = 0; i < dev->num_ports; i++)
1780		mlx5_ib_dealloc_q_port_counter(dev, i);
1781}
1782
1783static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
1784{
1785	int i;
1786	int ret;
1787
1788	for (i = 0; i < dev->num_ports; i++) {
1789		ret = mlx5_vport_alloc_q_counter(dev->mdev,
1790						 MLX5_INTERFACE_PROTOCOL_IB,
1791						 &dev->port[i].q_cnt_id);
1792		if (ret) {
1793			mlx5_ib_warn(dev,
1794				     "couldn't allocate queue counter for port %d\n",
1795				     i + 1);
1796			goto dealloc_counters;
1797		}
1798	}
1799
1800	return 0;
1801
1802dealloc_counters:
1803	while (--i >= 0)
1804		mlx5_ib_dealloc_q_port_counter(dev, i);
1805
1806	return ret;
1807}
1808
1809struct port_attribute {
1810	struct attribute attr;
1811	ssize_t (*show)(struct mlx5_ib_port *,
1812			struct port_attribute *, char *buf);
1813	ssize_t (*store)(struct mlx5_ib_port *,
1814			 struct port_attribute *,
1815			 const char *buf, size_t count);
1816};
1817
1818struct port_counter_attribute {
1819	struct port_attribute	attr;
1820	size_t			offset;
1821};
1822
1823static ssize_t port_attr_show(struct kobject *kobj,
1824			      struct attribute *attr, char *buf)
1825{
1826	struct port_attribute *port_attr =
1827		container_of(attr, struct port_attribute, attr);
1828	struct mlx5_ib_port_sysfs_group *p =
1829		container_of(kobj, struct mlx5_ib_port_sysfs_group,
1830			     kobj);
1831	struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port,
1832						    group);
1833
1834	if (!port_attr->show)
1835		return -EIO;
1836
1837	return port_attr->show(mibport, port_attr, buf);
1838}
1839
1840static ssize_t show_port_counter(struct mlx5_ib_port *p,
1841				 struct port_attribute *port_attr,
1842				 char *buf)
1843{
1844	int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
1845	struct port_counter_attribute *counter_attr =
1846		container_of(port_attr, struct port_counter_attribute, attr);
1847	void *out;
1848	int ret;
1849
1850	out = mlx5_vzalloc(outlen);
1851	if (!out)
1852		return -ENOMEM;
1853
1854	ret = mlx5_vport_query_q_counter(p->dev->mdev,
1855					 p->q_cnt_id, 0,
1856					 out, outlen);
1857	if (ret)
1858		goto free;
1859
1860	ret = sprintf(buf, "%d\n",
1861		      be32_to_cpu(*(__be32 *)(out + counter_attr->offset)));
1862
1863free:
1864	kfree(out);
1865	return ret;
1866}
1867
1868#define PORT_COUNTER_ATTR(_name)					\
1869struct port_counter_attribute port_counter_attr_##_name = {		\
1870	.attr  = __ATTR(_name, S_IRUGO, show_port_counter, NULL),	\
1871	.offset = MLX5_BYTE_OFF(query_q_counter_out, _name)		\
1872}
1873
1874static PORT_COUNTER_ATTR(rx_write_requests);
1875static PORT_COUNTER_ATTR(rx_read_requests);
1876static PORT_COUNTER_ATTR(rx_atomic_requests);
1877static PORT_COUNTER_ATTR(rx_dct_connect);
1878static PORT_COUNTER_ATTR(out_of_buffer);
1879static PORT_COUNTER_ATTR(out_of_sequence);
1880static PORT_COUNTER_ATTR(duplicate_request);
1881static PORT_COUNTER_ATTR(rnr_nak_retry_err);
1882static PORT_COUNTER_ATTR(packet_seq_err);
1883static PORT_COUNTER_ATTR(implied_nak_seq_err);
1884static PORT_COUNTER_ATTR(local_ack_timeout_err);
1885
1886static struct attribute *counter_attrs[] = {
1887	&port_counter_attr_rx_write_requests.attr.attr,
1888	&port_counter_attr_rx_read_requests.attr.attr,
1889	&port_counter_attr_rx_atomic_requests.attr.attr,
1890	&port_counter_attr_rx_dct_connect.attr.attr,
1891	&port_counter_attr_out_of_buffer.attr.attr,
1892	&port_counter_attr_out_of_sequence.attr.attr,
1893	&port_counter_attr_duplicate_request.attr.attr,
1894	&port_counter_attr_rnr_nak_retry_err.attr.attr,
1895	&port_counter_attr_packet_seq_err.attr.attr,
1896	&port_counter_attr_implied_nak_seq_err.attr.attr,
1897	&port_counter_attr_local_ack_timeout_err.attr.attr,
1898	NULL
1899};
1900
1901static struct attribute_group port_counters_group = {
1902	.name  = "counters",
1903	.attrs  = counter_attrs
1904};
1905
1906static const struct sysfs_ops port_sysfs_ops = {
1907	.show = port_attr_show
1908};
1909
1910static struct kobj_type port_type = {
1911	.sysfs_ops     = &port_sysfs_ops,
1912};
1913
1914static int add_port_attrs(struct mlx5_ib_dev *dev,
1915			  struct kobject *parent,
1916			  struct mlx5_ib_port_sysfs_group *port,
1917			  u8 port_num)
1918{
1919	int ret;
1920
1921	ret = kobject_init_and_add(&port->kobj, &port_type,
1922				   parent,
1923				   "%d", port_num);
1924	if (ret)
1925		return ret;
1926
1927	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1928	    MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
1929		ret = sysfs_create_group(&port->kobj, &port_counters_group);
1930		if (ret)
1931			goto put_kobj;
1932	}
1933
1934	port->enabled = true;
1935	return ret;
1936
1937put_kobj:
1938	kobject_put(&port->kobj);
1939	return ret;
1940}
1941
1942static void destroy_ports_attrs(struct mlx5_ib_dev *dev,
1943				unsigned int num_ports)
1944{
1945	unsigned int i;
1946
1947	for (i = 0; i < num_ports; i++) {
1948		struct mlx5_ib_port_sysfs_group *port =
1949			&dev->port[i].group;
1950
1951		if (!port->enabled)
1952			continue;
1953
1954		if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1955		    MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
1956			sysfs_remove_group(&port->kobj,
1957					   &port_counters_group);
1958		kobject_put(&port->kobj);
1959		port->enabled = false;
1960	}
1961
1962	if (dev->ports_parent) {
1963		kobject_put(dev->ports_parent);
1964		dev->ports_parent = NULL;
1965	}
1966}
1967
1968static int create_port_attrs(struct mlx5_ib_dev *dev)
1969{
1970	int ret = 0;
1971	unsigned int i = 0;
1972	struct device *device = &dev->ib_dev.dev;
1973
1974	dev->ports_parent = kobject_create_and_add("mlx5_ports",
1975						   &device->kobj);
1976	if (!dev->ports_parent)
1977		return -ENOMEM;
1978
1979	for (i = 0; i < dev->num_ports; i++) {
1980		ret = add_port_attrs(dev,
1981				     dev->ports_parent,
1982				     &dev->port[i].group,
1983				     i + 1);
1984
1985		if (ret)
1986			goto _destroy_ports_attrs;
1987	}
1988
1989	return 0;
1990
1991_destroy_ports_attrs:
1992	destroy_ports_attrs(dev, i);
1993	return ret;
1994}
1995
1996static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
1997{
1998	struct mlx5_ib_dev *dev;
1999	int err;
2000	int i;
2001
2002	printk_once(KERN_INFO "%s", mlx5_version);
2003
2004	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
2005	if (!dev)
2006		return NULL;
2007
2008	dev->mdev = mdev;
2009
2010	dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
2011			     GFP_KERNEL);
2012	if (!dev->port)
2013		goto err_dealloc;
2014
2015	for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2016		dev->port[i].dev = dev;
2017		dev->port[i].port_num = i;
2018		dev->port[i].port_gone = 0;
2019		memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table));
2020	}
2021
2022	err = get_port_caps(dev);
2023	if (err)
2024		goto err_free_port;
2025
2026	if (mlx5_use_mad_ifc(dev))
2027		get_ext_port_caps(dev);
2028
2029	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2030	    IB_LINK_LAYER_ETHERNET) {
2031		if (MLX5_CAP_GEN(mdev, roce)) {
2032			err = mlx5_nic_vport_enable_roce(mdev);
2033			if (err)
2034				goto err_free_port;
2035		} else {
2036			goto err_free_port;
2037		}
2038	}
2039
2040	MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
2041
2042	strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
2043	dev->ib_dev.owner		= THIS_MODULE;
2044	dev->ib_dev.node_type		= RDMA_NODE_IB_CA;
2045	dev->ib_dev.local_dma_lkey	= mdev->special_contexts.resd_lkey;
2046	dev->num_ports		= MLX5_CAP_GEN(mdev, num_ports);
2047	dev->ib_dev.phys_port_cnt     = dev->num_ports;
2048	dev->ib_dev.num_comp_vectors    =
2049		dev->mdev->priv.eq_table.num_comp_vectors;
2050	dev->ib_dev.dma_device	= &mdev->pdev->dev;
2051
2052	dev->ib_dev.uverbs_abi_ver	= MLX5_IB_UVERBS_ABI_VERSION;
2053	dev->ib_dev.uverbs_cmd_mask	=
2054		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
2055		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
2056		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
2057		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
2058		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
2059		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
2060		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
2061		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
2062		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
2063		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
2064		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
2065		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
2066		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
2067		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
2068		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
2069		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
2070		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
2071		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
2072		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
2073		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
2074		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
2075		(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)		|
2076		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
2077
2078	dev->ib_dev.query_device	= mlx5_ib_query_device;
2079	dev->ib_dev.query_port		= mlx5_ib_query_port;
2080	dev->ib_dev.get_link_layer	= mlx5_ib_port_link_layer;
2081	dev->ib_dev.get_netdev		= mlx5_ib_get_netdev;
2082	dev->ib_dev.query_gid		= mlx5_ib_query_gid;
2083	dev->ib_dev.query_pkey		= mlx5_ib_query_pkey;
2084	dev->ib_dev.modify_device	= mlx5_ib_modify_device;
2085	dev->ib_dev.modify_port		= mlx5_ib_modify_port;
2086	dev->ib_dev.alloc_ucontext	= mlx5_ib_alloc_ucontext;
2087	dev->ib_dev.dealloc_ucontext	= mlx5_ib_dealloc_ucontext;
2088	dev->ib_dev.mmap		= mlx5_ib_mmap;
2089	dev->ib_dev.alloc_pd		= mlx5_ib_alloc_pd;
2090	dev->ib_dev.dealloc_pd		= mlx5_ib_dealloc_pd;
2091	dev->ib_dev.create_ah		= mlx5_ib_create_ah;
2092	dev->ib_dev.query_ah		= mlx5_ib_query_ah;
2093	dev->ib_dev.destroy_ah		= mlx5_ib_destroy_ah;
2094	dev->ib_dev.create_srq		= mlx5_ib_create_srq;
2095	dev->ib_dev.modify_srq		= mlx5_ib_modify_srq;
2096	dev->ib_dev.query_srq		= mlx5_ib_query_srq;
2097	dev->ib_dev.destroy_srq		= mlx5_ib_destroy_srq;
2098	dev->ib_dev.post_srq_recv	= mlx5_ib_post_srq_recv;
2099	dev->ib_dev.create_qp		= mlx5_ib_create_qp;
2100	dev->ib_dev.modify_qp		= mlx5_ib_modify_qp;
2101	dev->ib_dev.query_qp		= mlx5_ib_query_qp;
2102	dev->ib_dev.destroy_qp		= mlx5_ib_destroy_qp;
2103	dev->ib_dev.post_send		= mlx5_ib_post_send;
2104	dev->ib_dev.post_recv		= mlx5_ib_post_recv;
2105	dev->ib_dev.create_cq		= mlx5_ib_create_cq;
2106	dev->ib_dev.modify_cq		= mlx5_ib_modify_cq;
2107	dev->ib_dev.resize_cq		= mlx5_ib_resize_cq;
2108	dev->ib_dev.destroy_cq		= mlx5_ib_destroy_cq;
2109	dev->ib_dev.poll_cq		= mlx5_ib_poll_cq;
2110	dev->ib_dev.req_notify_cq	= mlx5_ib_arm_cq;
2111	dev->ib_dev.get_dma_mr		= mlx5_ib_get_dma_mr;
2112	dev->ib_dev.reg_user_mr		= mlx5_ib_reg_user_mr;
2113	dev->ib_dev.reg_phys_mr		= mlx5_ib_reg_phys_mr;
2114	dev->ib_dev.dereg_mr		= mlx5_ib_dereg_mr;
2115	dev->ib_dev.attach_mcast	= mlx5_ib_mcg_attach;
2116	dev->ib_dev.detach_mcast	= mlx5_ib_mcg_detach;
2117	dev->ib_dev.process_mad		= mlx5_ib_process_mad;
2118	dev->ib_dev.alloc_fast_reg_mr	= mlx5_ib_alloc_fast_reg_mr;
2119	dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
2120	dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
2121
2122	if (MLX5_CAP_GEN(mdev, xrc)) {
2123		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
2124		dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
2125		dev->ib_dev.uverbs_cmd_mask |=
2126			(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2127			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2128	}
2129
2130	err = init_node_data(dev);
2131	if (err)
2132		goto err_disable_roce;
2133
2134	mutex_init(&dev->cap_mask_mutex);
2135	INIT_LIST_HEAD(&dev->qp_list);
2136	spin_lock_init(&dev->reset_flow_resource_lock);
2137
2138	err = create_dev_resources(&dev->devr);
2139	if (err)
2140		goto err_disable_roce;
2141
2142
2143	err = mlx5_ib_alloc_q_counters(dev);
2144	if (err)
2145		goto err_odp;
2146
2147	err = ib_register_device(&dev->ib_dev, NULL);
2148	if (err)
2149		goto err_q_cnt;
2150
2151	err = create_umr_res(dev);
2152	if (err)
2153		goto err_dev;
2154
2155	if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2156	    MLX5_CAP_PORT_TYPE_IB) {
2157		if (init_dc_improvements(dev))
2158			mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n");
2159	}
2160
2161	err = create_port_attrs(dev);
2162	if (err)
2163		goto err_dc;
2164
2165	for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2166		err = device_create_file(&dev->ib_dev.dev,
2167					 mlx5_class_attributes[i]);
2168		if (err)
2169			goto err_port_attrs;
2170	}
2171
2172	if (1) {
2173		struct thread *rl_thread = NULL;
2174		struct proc *rl_proc = NULL;
2175
2176		for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2177			(void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread,
2178			    RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i);
2179		}
2180	}
2181
2182	dev->ib_active = true;
2183
2184	return dev;
2185
2186err_port_attrs:
2187	destroy_ports_attrs(dev, dev->num_ports);
2188
2189err_dc:
2190	if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2191	    MLX5_CAP_PORT_TYPE_IB)
2192		cleanup_dc_improvements(dev);
2193	destroy_umrc_res(dev);
2194
2195err_dev:
2196	ib_unregister_device(&dev->ib_dev);
2197
2198err_q_cnt:
2199	mlx5_ib_dealloc_q_counters(dev);
2200
2201err_odp:
2202	destroy_dev_resources(&dev->devr);
2203
2204err_disable_roce:
2205	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2206	    IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2207		mlx5_nic_vport_disable_roce(mdev);
2208err_free_port:
2209	kfree(dev->port);
2210
2211err_dealloc:
2212	ib_dealloc_device((struct ib_device *)dev);
2213
2214	return NULL;
2215}
2216
2217static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
2218{
2219	struct mlx5_ib_dev *dev = context;
2220	int i;
2221
2222	for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2223		dev->port[i].port_gone = 1;
2224		while (dev->port[i].port_gone != 2)
2225			pause("W", hz);
2226	}
2227
2228	for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2229		device_remove_file(&dev->ib_dev.dev,
2230		    mlx5_class_attributes[i]);
2231	}
2232
2233	destroy_ports_attrs(dev, dev->num_ports);
2234	if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2235	    MLX5_CAP_PORT_TYPE_IB)
2236		cleanup_dc_improvements(dev);
2237	mlx5_ib_dealloc_q_counters(dev);
2238	ib_unregister_device(&dev->ib_dev);
2239	destroy_umrc_res(dev);
2240	destroy_dev_resources(&dev->devr);
2241
2242	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2243	    IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2244		mlx5_nic_vport_disable_roce(mdev);
2245
2246	kfree(dev->port);
2247	ib_dealloc_device(&dev->ib_dev);
2248}
2249
2250static struct mlx5_interface mlx5_ib_interface = {
2251	.add            = mlx5_ib_add,
2252	.remove         = mlx5_ib_remove,
2253	.event          = mlx5_ib_event,
2254	.protocol	= MLX5_INTERFACE_PROTOCOL_IB,
2255};
2256
2257static int __init mlx5_ib_init(void)
2258{
2259	int err;
2260
2261	if (deprecated_prof_sel != 2)
2262		printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
2263
2264	err = mlx5_register_interface(&mlx5_ib_interface);
2265	if (err)
2266		goto clean_odp;
2267
2268	mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq");
2269	if (!mlx5_ib_wq) {
2270		printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__);
2271		goto err_unreg;
2272	}
2273
2274	return err;
2275
2276err_unreg:
2277	mlx5_unregister_interface(&mlx5_ib_interface);
2278
2279clean_odp:
2280	return err;
2281}
2282
2283static void __exit mlx5_ib_cleanup(void)
2284{
2285	destroy_workqueue(mlx5_ib_wq);
2286	mlx5_unregister_interface(&mlx5_ib_interface);
2287}
2288
2289module_init_order(mlx5_ib_init, SI_ORDER_THIRD);
2290module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD);
2291