1/*-
2 * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23 * SUCH DAMAGE.
24 *
25 * $FreeBSD: stable/10/sys/dev/mlx5/mlx5_ib/mlx5_ib_main.c 337748 2018-08-14 11:52:05Z hselasky $
26 */
27
28#include <linux/errno.h>
29#include <linux/pci.h>
30#include <linux/dma-mapping.h>
31#include <linux/slab.h>
32#include <linux/io-mapping.h>
33#include <linux/sched.h>
34#include <linux/netdevice.h>
35#include <linux/etherdevice.h>
36#include <net/ipv6.h>
37#include <linux/list.h>
38#include <dev/mlx5/driver.h>
39#include <dev/mlx5/vport.h>
40#include <asm/pgtable.h>
41#include <linux/fs.h>
42#undef inode
43
44#include <rdma/ib_user_verbs.h>
45#include <rdma/ib_smi.h>
46#include <rdma/ib_umem.h>
47#include "user.h"
48#include "mlx5_ib.h"
49
50#include <sys/unistd.h>
51
52#define DRIVER_NAME "mlx5_ib"
53#define DRIVER_VERSION "3.2.1"
54#define DRIVER_RELDATE	"August 2018"
55
56#undef MODULE_VERSION
57#include <sys/module.h>
58
59MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
60MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
61MODULE_LICENSE("Dual BSD/GPL");
62MODULE_DEPEND(mlx5ib, mlx5, 1, 1, 1);
63MODULE_DEPEND(mlx5ib, ibcore, 1, 1, 1);
64MODULE_VERSION(mlx5ib, 1);
65
66static int deprecated_prof_sel = 2;
67module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
68MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
69
70enum {
71	MLX5_STANDARD_ATOMIC_SIZE = 0x8,
72};
73
74struct workqueue_struct *mlx5_ib_wq;
75
76static char mlx5_version[] =
77	DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
78	DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
79
80static void get_atomic_caps(struct mlx5_ib_dev *dev,
81			    struct ib_device_attr *props)
82{
83	int tmp;
84	u8 atomic_operations;
85	u8 atomic_size_qp;
86	u8 atomic_req_endianess;
87
88	atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
89	atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
90	atomic_req_endianess = MLX5_CAP_ATOMIC(dev->mdev,
91					       atomic_req_8B_endianess_mode) ||
92			       !mlx5_host_is_le();
93
94	tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
95	if (((atomic_operations & tmp) == tmp)
96	    && (atomic_size_qp & 8)) {
97		if (atomic_req_endianess) {
98			props->atomic_cap = IB_ATOMIC_HCA;
99		} else {
100			props->atomic_cap = IB_ATOMIC_NONE;
101		}
102	} else {
103		props->atomic_cap = IB_ATOMIC_NONE;
104	}
105
106	tmp = MLX5_ATOMIC_OPS_MASKED_CMP_SWAP | MLX5_ATOMIC_OPS_MASKED_FETCH_ADD;
107	if (((atomic_operations & tmp) == tmp)
108	    &&(atomic_size_qp & 8)) {
109		if (atomic_req_endianess)
110			props->masked_atomic_cap = IB_ATOMIC_HCA;
111		else {
112			props->masked_atomic_cap = IB_ATOMIC_NONE;
113		}
114	} else {
115		props->masked_atomic_cap = IB_ATOMIC_NONE;
116	}
117}
118
119static enum rdma_link_layer
120mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
121{
122	struct mlx5_ib_dev *dev = to_mdev(device);
123
124	switch (MLX5_CAP_GEN(dev->mdev, port_type)) {
125	case MLX5_CAP_PORT_TYPE_IB:
126		return IB_LINK_LAYER_INFINIBAND;
127	case MLX5_CAP_PORT_TYPE_ETH:
128		return IB_LINK_LAYER_ETHERNET;
129	default:
130		return IB_LINK_LAYER_UNSPECIFIED;
131	}
132}
133
134static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
135{
136	return !dev->mdev->issi;
137}
138
139enum {
140	MLX5_VPORT_ACCESS_METHOD_MAD,
141	MLX5_VPORT_ACCESS_METHOD_HCA,
142	MLX5_VPORT_ACCESS_METHOD_NIC,
143};
144
145static int mlx5_get_vport_access_method(struct ib_device *ibdev)
146{
147	if (mlx5_use_mad_ifc(to_mdev(ibdev)))
148		return MLX5_VPORT_ACCESS_METHOD_MAD;
149
150	if (mlx5_ib_port_link_layer(ibdev, 1) ==
151	    IB_LINK_LAYER_ETHERNET)
152		return MLX5_VPORT_ACCESS_METHOD_NIC;
153
154	return MLX5_VPORT_ACCESS_METHOD_HCA;
155}
156
157static int mlx5_query_system_image_guid(struct ib_device *ibdev,
158					__be64 *sys_image_guid)
159{
160	struct mlx5_ib_dev *dev = to_mdev(ibdev);
161	struct mlx5_core_dev *mdev = dev->mdev;
162	u64 tmp;
163	int err;
164
165	switch (mlx5_get_vport_access_method(ibdev)) {
166	case MLX5_VPORT_ACCESS_METHOD_MAD:
167		return mlx5_query_system_image_guid_mad_ifc(ibdev,
168							    sys_image_guid);
169
170	case MLX5_VPORT_ACCESS_METHOD_HCA:
171		err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
172		if (!err)
173			*sys_image_guid = cpu_to_be64(tmp);
174		return err;
175
176	case MLX5_VPORT_ACCESS_METHOD_NIC:
177		err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
178		if (!err)
179			*sys_image_guid = cpu_to_be64(tmp);
180		return err;
181
182	default:
183		return -EINVAL;
184	}
185}
186
187static int mlx5_query_max_pkeys(struct ib_device *ibdev,
188				u16 *max_pkeys)
189{
190	struct mlx5_ib_dev *dev = to_mdev(ibdev);
191	struct mlx5_core_dev *mdev = dev->mdev;
192
193	switch (mlx5_get_vport_access_method(ibdev)) {
194	case MLX5_VPORT_ACCESS_METHOD_MAD:
195		return mlx5_query_max_pkeys_mad_ifc(ibdev, max_pkeys);
196
197	case MLX5_VPORT_ACCESS_METHOD_HCA:
198	case MLX5_VPORT_ACCESS_METHOD_NIC:
199		*max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
200						pkey_table_size));
201		return 0;
202
203	default:
204		return -EINVAL;
205	}
206}
207
208static int mlx5_query_vendor_id(struct ib_device *ibdev,
209				u32 *vendor_id)
210{
211	struct mlx5_ib_dev *dev = to_mdev(ibdev);
212
213	switch (mlx5_get_vport_access_method(ibdev)) {
214	case MLX5_VPORT_ACCESS_METHOD_MAD:
215		return mlx5_query_vendor_id_mad_ifc(ibdev, vendor_id);
216
217	case MLX5_VPORT_ACCESS_METHOD_HCA:
218	case MLX5_VPORT_ACCESS_METHOD_NIC:
219		return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
220
221	default:
222		return -EINVAL;
223	}
224}
225
226static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
227				__be64 *node_guid)
228{
229	u64 tmp;
230	int err;
231
232	switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
233	case MLX5_VPORT_ACCESS_METHOD_MAD:
234		return mlx5_query_node_guid_mad_ifc(dev, node_guid);
235
236	case MLX5_VPORT_ACCESS_METHOD_HCA:
237		err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
238		if (!err)
239			*node_guid = cpu_to_be64(tmp);
240		return err;
241
242	case MLX5_VPORT_ACCESS_METHOD_NIC:
243		err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
244		if (!err)
245			*node_guid = cpu_to_be64(tmp);
246		return err;
247
248	default:
249		return -EINVAL;
250	}
251}
252
253struct mlx5_reg_node_desc {
254	u8	desc[64];
255};
256
257static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
258{
259	struct mlx5_reg_node_desc in;
260
261	if (mlx5_use_mad_ifc(dev))
262		return mlx5_query_node_desc_mad_ifc(dev, node_desc);
263
264	memset(&in, 0, sizeof(in));
265
266	return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
267				    sizeof(struct mlx5_reg_node_desc),
268				    MLX5_REG_NODE_DESC, 0, 0);
269}
270
271static int mlx5_ib_query_device(struct ib_device *ibdev,
272				struct ib_device_attr *props)
273{
274	struct mlx5_ib_dev *dev = to_mdev(ibdev);
275	struct mlx5_core_dev *mdev = dev->mdev;
276	int max_sq_desc;
277	int max_rq_sg;
278	int max_sq_sg;
279	int err;
280
281
282	memset(props, 0, sizeof(*props));
283
284	err = mlx5_query_system_image_guid(ibdev,
285					   &props->sys_image_guid);
286	if (err)
287		return err;
288
289	err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
290	if (err)
291		return err;
292
293	err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
294	if (err)
295		return err;
296
297	props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
298		((u64)fw_rev_min(dev->mdev) << 16) |
299		fw_rev_sub(dev->mdev);
300	props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
301		IB_DEVICE_PORT_ACTIVE_EVENT		|
302		IB_DEVICE_SYS_IMAGE_GUID		|
303		IB_DEVICE_RC_RNR_NAK_GEN;
304
305	if (MLX5_CAP_GEN(mdev, pkv))
306		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
307	if (MLX5_CAP_GEN(mdev, qkv))
308		props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
309	if (MLX5_CAP_GEN(mdev, apm))
310		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
311	props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
312	if (MLX5_CAP_GEN(mdev, xrc))
313		props->device_cap_flags |= IB_DEVICE_XRC;
314	props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
315	if (MLX5_CAP_GEN(mdev, block_lb_mc))
316		props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
317
318	props->vendor_part_id	   = mdev->pdev->device;
319	props->hw_ver		   = mdev->pdev->revision;
320
321	props->max_mr_size	   = ~0ull;
322	props->page_size_cap	   = ~(u32)((1ull << MLX5_CAP_GEN(mdev, log_pg_sz)) -1);
323	props->max_qp		   = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
324	props->max_qp_wr	   = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
325	max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
326		     sizeof(struct mlx5_wqe_data_seg);
327	max_sq_desc = min((int)MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
328	max_sq_sg = (max_sq_desc -
329		     sizeof(struct mlx5_wqe_ctrl_seg) -
330		     sizeof(struct mlx5_wqe_raddr_seg)) / sizeof(struct mlx5_wqe_data_seg);
331	props->max_sge = min(max_rq_sg, max_sq_sg);
332	props->max_cq		   = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
333	props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
334	props->max_mr		   = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
335	props->max_pd		   = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
336	props->max_qp_rd_atom	   = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
337	props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
338	props->max_srq		   = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
339	props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
340	props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
341	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
342	props->max_srq_sge	   = max_rq_sg - 1;
343	props->max_fast_reg_page_list_len = (unsigned int)-1;
344	get_atomic_caps(dev, props);
345	props->max_mcast_grp	   = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
346	props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
347	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
348					   props->max_mcast_grp;
349	props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
350	props->max_ah		= INT_MAX;
351
352	return 0;
353}
354
355enum mlx5_ib_width {
356	MLX5_IB_WIDTH_1X	= 1 << 0,
357	MLX5_IB_WIDTH_2X	= 1 << 1,
358	MLX5_IB_WIDTH_4X	= 1 << 2,
359	MLX5_IB_WIDTH_8X	= 1 << 3,
360	MLX5_IB_WIDTH_12X	= 1 << 4
361};
362
363static int translate_active_width(struct ib_device *ibdev, u8 active_width,
364				  u8 *ib_width)
365{
366	struct mlx5_ib_dev *dev = to_mdev(ibdev);
367	int err = 0;
368
369	if (active_width & MLX5_IB_WIDTH_1X) {
370		*ib_width = IB_WIDTH_1X;
371	} else if (active_width & MLX5_IB_WIDTH_2X) {
372		mlx5_ib_warn(dev, "active_width %d is not supported by IB spec\n",
373			     (int)active_width);
374		err = -EINVAL;
375	} else if (active_width & MLX5_IB_WIDTH_4X) {
376		*ib_width = IB_WIDTH_4X;
377	} else if (active_width & MLX5_IB_WIDTH_8X) {
378		*ib_width = IB_WIDTH_8X;
379	} else if (active_width & MLX5_IB_WIDTH_12X) {
380		*ib_width = IB_WIDTH_12X;
381	} else {
382		mlx5_ib_dbg(dev, "Invalid active_width %d\n",
383			    (int)active_width);
384		err = -EINVAL;
385	}
386
387	return err;
388}
389
390/*
391 * TODO: Move to IB core
392 */
393enum ib_max_vl_num {
394	__IB_MAX_VL_0		= 1,
395	__IB_MAX_VL_0_1		= 2,
396	__IB_MAX_VL_0_3		= 3,
397	__IB_MAX_VL_0_7		= 4,
398	__IB_MAX_VL_0_14	= 5,
399};
400
401enum mlx5_vl_hw_cap {
402	MLX5_VL_HW_0	= 1,
403	MLX5_VL_HW_0_1	= 2,
404	MLX5_VL_HW_0_2	= 3,
405	MLX5_VL_HW_0_3	= 4,
406	MLX5_VL_HW_0_4	= 5,
407	MLX5_VL_HW_0_5	= 6,
408	MLX5_VL_HW_0_6	= 7,
409	MLX5_VL_HW_0_7	= 8,
410	MLX5_VL_HW_0_14	= 15
411};
412
413static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
414				u8 *max_vl_num)
415{
416	switch (vl_hw_cap) {
417	case MLX5_VL_HW_0:
418		*max_vl_num = __IB_MAX_VL_0;
419		break;
420	case MLX5_VL_HW_0_1:
421		*max_vl_num = __IB_MAX_VL_0_1;
422		break;
423	case MLX5_VL_HW_0_3:
424		*max_vl_num = __IB_MAX_VL_0_3;
425		break;
426	case MLX5_VL_HW_0_7:
427		*max_vl_num = __IB_MAX_VL_0_7;
428		break;
429	case MLX5_VL_HW_0_14:
430		*max_vl_num = __IB_MAX_VL_0_14;
431		break;
432
433	default:
434		return -EINVAL;
435	}
436
437	return 0;
438}
439
440static int mlx5_query_port_ib(struct ib_device *ibdev, u8 port,
441			      struct ib_port_attr *props)
442{
443	struct mlx5_ib_dev *dev = to_mdev(ibdev);
444	struct mlx5_core_dev *mdev = dev->mdev;
445	u32 *rep;
446	int outlen = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
447	struct mlx5_ptys_reg *ptys;
448	struct mlx5_pmtu_reg *pmtu;
449	struct mlx5_pvlc_reg pvlc;
450	void *ctx;
451	int err;
452
453	rep = mlx5_vzalloc(outlen);
454	ptys = kzalloc(sizeof(*ptys), GFP_KERNEL);
455	pmtu = kzalloc(sizeof(*pmtu), GFP_KERNEL);
456	if (!rep || !ptys || !pmtu) {
457		err = -ENOMEM;
458		goto out;
459	}
460
461	memset(props, 0, sizeof(*props));
462
463	/* what if I am pf with dual port */
464	err = mlx5_query_hca_vport_context(mdev, port, 0, rep, outlen);
465	if (err)
466		goto out;
467
468	ctx = MLX5_ADDR_OF(query_hca_vport_context_out, rep, hca_vport_context);
469
470	props->lid		= MLX5_GET(hca_vport_context, ctx, lid);
471	props->lmc		= MLX5_GET(hca_vport_context, ctx, lmc);
472	props->sm_lid		= MLX5_GET(hca_vport_context, ctx, sm_lid);
473	props->sm_sl		= MLX5_GET(hca_vport_context, ctx, sm_sl);
474	props->state		= MLX5_GET(hca_vport_context, ctx, vport_state);
475	props->phys_state	= MLX5_GET(hca_vport_context, ctx,
476					port_physical_state);
477	props->port_cap_flags	= MLX5_GET(hca_vport_context, ctx, cap_mask1);
478	props->gid_tbl_len	= mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
479	props->max_msg_sz	= 1 << MLX5_CAP_GEN(mdev, log_max_msg);
480	props->pkey_tbl_len	= mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
481	props->bad_pkey_cntr	= MLX5_GET(hca_vport_context, ctx,
482					      pkey_violation_counter);
483	props->qkey_viol_cntr	= MLX5_GET(hca_vport_context, ctx,
484					      qkey_violation_counter);
485	props->subnet_timeout	= MLX5_GET(hca_vport_context, ctx,
486					      subnet_timeout);
487	props->init_type_reply	= MLX5_GET(hca_vport_context, ctx,
488					   init_type_reply);
489
490	ptys->proto_mask |= MLX5_PTYS_IB;
491	ptys->local_port = port;
492	err = mlx5_core_access_ptys(mdev, ptys, 0);
493	if (err)
494		goto out;
495
496	err = translate_active_width(ibdev, ptys->ib_link_width_oper,
497				     &props->active_width);
498	if (err)
499		goto out;
500
501	props->active_speed	= (u8)ptys->ib_proto_oper;
502
503	pmtu->local_port = port;
504	err = mlx5_core_access_pmtu(mdev, pmtu, 0);
505	if (err)
506		goto out;
507
508	props->max_mtu		= pmtu->max_mtu;
509	props->active_mtu	= pmtu->oper_mtu;
510
511	memset(&pvlc, 0, sizeof(pvlc));
512	pvlc.local_port = port;
513	err = mlx5_core_access_pvlc(mdev, &pvlc, 0);
514	if (err)
515		goto out;
516
517	err = translate_max_vl_num(ibdev, pvlc.vl_hw_cap,
518				   &props->max_vl_num);
519out:
520	kvfree(rep);
521	kfree(ptys);
522	kfree(pmtu);
523	return err;
524}
525
526int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
527		       struct ib_port_attr *props)
528{
529	switch (mlx5_get_vport_access_method(ibdev)) {
530	case MLX5_VPORT_ACCESS_METHOD_MAD:
531		return mlx5_query_port_mad_ifc(ibdev, port, props);
532
533	case MLX5_VPORT_ACCESS_METHOD_HCA:
534		return mlx5_query_port_ib(ibdev, port, props);
535
536	case MLX5_VPORT_ACCESS_METHOD_NIC:
537		return mlx5_query_port_roce(ibdev, port, props);
538
539	default:
540		return -EINVAL;
541	}
542}
543
544static void
545mlx5_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
546{
547	if (dev->if_addrlen != ETH_ALEN)
548		return;
549
550	memcpy(eui, IF_LLADDR(dev), 3);
551	memcpy(eui + 5, IF_LLADDR(dev) + 3, 3);
552
553	if (vlan_id < 0x1000) {
554		eui[3] = vlan_id >> 8;
555		eui[4] = vlan_id & 0xff;
556	} else {
557		eui[3] = 0xFF;
558		eui[4] = 0xFE;
559	}
560	eui[0] ^= 2;
561}
562
563static void
564mlx5_make_default_gid(struct net_device *dev, union ib_gid *gid)
565{
566	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
567	mlx5_addrconf_ifid_eui48(&gid->raw[8], 0xFFFF, dev);
568}
569
570static void
571mlx5_ib_roce_port_update(void *arg)
572{
573	struct mlx5_ib_port *port = (struct mlx5_ib_port *)arg;
574	struct mlx5_ib_dev *dev = port->dev;
575	struct mlx5_core_dev *mdev = dev->mdev;
576	struct net_device *xdev[MLX5_IB_GID_MAX];
577	struct net_device *idev;
578	struct net_device *ndev;
579	union ib_gid gid_temp;
580
581	while (port->port_gone == 0) {
582		int update = 0;
583		int gid_index = 0;
584		int j;
585		int error;
586
587		ndev = mlx5_get_protocol_dev(mdev, MLX5_INTERFACE_PROTOCOL_ETH);
588		if (ndev == NULL) {
589			pause("W", hz);
590			continue;
591		}
592
593		CURVNET_SET_QUIET(ndev->if_vnet);
594
595		memset(&gid_temp, 0, sizeof(gid_temp));
596		mlx5_make_default_gid(ndev, &gid_temp);
597		if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
598			port->gid_table[gid_index] = gid_temp;
599			update = 1;
600		}
601		xdev[gid_index] = ndev;
602		gid_index++;
603
604		IFNET_RLOCK();
605		TAILQ_FOREACH(idev, &V_ifnet, if_link) {
606			if (idev == ndev)
607				break;
608		}
609		if (idev != NULL) {
610		    TAILQ_FOREACH(idev, &V_ifnet, if_link) {
611			u16 vid;
612
613			if (idev != ndev) {
614				if (idev->if_type != IFT_L2VLAN)
615					continue;
616				if (ndev != rdma_vlan_dev_real_dev(idev))
617					continue;
618			}
619
620			/* setup valid MAC-based GID */
621			memset(&gid_temp, 0, sizeof(gid_temp));
622			gid_temp.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
623			vid = rdma_vlan_dev_vlan_id(idev);
624			mlx5_addrconf_ifid_eui48(&gid_temp.raw[8], vid, idev);
625
626			/* check for existing entry */
627			for (j = 0; j != gid_index; j++) {
628				if (bcmp(&gid_temp, &port->gid_table[j], sizeof(gid_temp)) == 0)
629					break;
630			}
631
632			/* check if new entry should be added */
633			if (j == gid_index && gid_index < MLX5_IB_GID_MAX) {
634				if (bcmp(&gid_temp, &port->gid_table[gid_index], sizeof(gid_temp))) {
635					port->gid_table[gid_index] = gid_temp;
636					update = 1;
637				}
638				xdev[gid_index] = idev;
639				gid_index++;
640			}
641		    }
642		}
643		IFNET_RUNLOCK();
644		CURVNET_RESTORE();
645
646		if (update != 0 &&
647		    mlx5_ib_port_link_layer(&dev->ib_dev, 1) == IB_LINK_LAYER_ETHERNET) {
648			struct ib_event event = {
649			    .device = &dev->ib_dev,
650			    .element.port_num = port->port_num + 1,
651			    .event = IB_EVENT_GID_CHANGE,
652			};
653
654			/* add new entries, if any */
655			for (j = 0; j != gid_index; j++) {
656				error = modify_gid_roce(&dev->ib_dev, port->port_num, j,
657				    port->gid_table + j, xdev[j]);
658				if (error != 0)
659					printf("mlx5_ib: Failed to update ROCE GID table: %d\n", error);
660			}
661			memset(&gid_temp, 0, sizeof(gid_temp));
662
663			/* clear old entries, if any */
664			for (; j != MLX5_IB_GID_MAX; j++) {
665				if (bcmp(&gid_temp, port->gid_table + j, sizeof(gid_temp)) == 0)
666					continue;
667				port->gid_table[j] = gid_temp;
668				(void) modify_gid_roce(&dev->ib_dev, port->port_num, j,
669				    port->gid_table + j, ndev);
670			}
671
672			/* make sure ibcore gets updated */
673			ib_dispatch_event(&event);
674		}
675		pause("W", hz);
676	}
677	do {
678		struct ib_event event = {
679			.device = &dev->ib_dev,
680			.element.port_num = port->port_num + 1,
681			.event = IB_EVENT_GID_CHANGE,
682		};
683		/* make sure ibcore gets updated */
684		ib_dispatch_event(&event);
685
686		/* wait a bit */
687		pause("W", hz);
688	} while (0);
689	port->port_gone = 2;
690	kthread_exit();
691}
692
693static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
694			     union ib_gid *gid)
695{
696	struct mlx5_ib_dev *dev = to_mdev(ibdev);
697	struct mlx5_core_dev *mdev = dev->mdev;
698
699	switch (mlx5_get_vport_access_method(ibdev)) {
700	case MLX5_VPORT_ACCESS_METHOD_MAD:
701		return mlx5_query_gids_mad_ifc(ibdev, port, index, gid);
702
703	case MLX5_VPORT_ACCESS_METHOD_HCA:
704		return mlx5_query_hca_vport_gid(mdev, port, 0, index, gid);
705
706	case MLX5_VPORT_ACCESS_METHOD_NIC:
707		if (port == 0 || port > MLX5_CAP_GEN(mdev, num_ports) ||
708		    index < 0 || index >= MLX5_IB_GID_MAX ||
709		    dev->port[port - 1].port_gone != 0)
710			memset(gid, 0, sizeof(*gid));
711		else
712			*gid = dev->port[port - 1].gid_table[index];
713		return 0;
714
715	default:
716		return -EINVAL;
717	}
718}
719
720static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
721			      u16 *pkey)
722{
723	struct mlx5_ib_dev *dev = to_mdev(ibdev);
724	struct mlx5_core_dev *mdev = dev->mdev;
725
726	switch (mlx5_get_vport_access_method(ibdev)) {
727	case MLX5_VPORT_ACCESS_METHOD_MAD:
728		return mlx5_query_pkey_mad_ifc(ibdev, port, index, pkey);
729
730	case MLX5_VPORT_ACCESS_METHOD_HCA:
731	case MLX5_VPORT_ACCESS_METHOD_NIC:
732		return mlx5_query_hca_vport_pkey(mdev, 0, port, 0, index,
733						 pkey);
734
735	default:
736		return -EINVAL;
737	}
738}
739
740static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
741				 struct ib_device_modify *props)
742{
743	struct mlx5_ib_dev *dev = to_mdev(ibdev);
744	struct mlx5_reg_node_desc in;
745	struct mlx5_reg_node_desc out;
746	int err;
747
748	if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
749		return -EOPNOTSUPP;
750
751	if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
752		return 0;
753
754	/*
755	 * If possible, pass node desc to FW, so it can generate
756	 * a 144 trap.  If cmd fails, just ignore.
757	 */
758	memcpy(&in, props->node_desc, 64);
759	err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
760				   sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
761	if (err)
762		return err;
763
764	memcpy(ibdev->node_desc, props->node_desc, 64);
765
766	return err;
767}
768
769static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
770			       struct ib_port_modify *props)
771{
772	u8 is_eth = (mlx5_ib_port_link_layer(ibdev, port) ==
773		     IB_LINK_LAYER_ETHERNET);
774	struct mlx5_ib_dev *dev = to_mdev(ibdev);
775	struct ib_port_attr attr;
776	u32 tmp;
777	int err;
778
779	/* return OK if this is RoCE. CM calls ib_modify_port() regardless
780	 * of whether port link layer is ETH or IB. For ETH ports, qkey
781	 * violations and port capabilities are not valid.
782	 */
783	if (is_eth)
784		return 0;
785
786	mutex_lock(&dev->cap_mask_mutex);
787
788	err = mlx5_ib_query_port(ibdev, port, &attr);
789	if (err)
790		goto out;
791
792	tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
793		~props->clr_port_cap_mask;
794
795	err = mlx5_set_port_caps(dev->mdev, port, tmp);
796
797out:
798	mutex_unlock(&dev->cap_mask_mutex);
799	return err;
800}
801
802enum mlx5_cap_flags {
803	MLX5_CAP_COMPACT_AV = 1 << 0,
804};
805
806static void set_mlx5_flags(u32 *flags, struct mlx5_core_dev *dev)
807{
808	*flags |= MLX5_CAP_GEN(dev, compact_address_vector) ?
809		  MLX5_CAP_COMPACT_AV : 0;
810}
811
812static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
813						  struct ib_udata *udata)
814{
815	struct mlx5_ib_dev *dev = to_mdev(ibdev);
816	struct mlx5_ib_alloc_ucontext_req_v2 req;
817	struct mlx5_ib_alloc_ucontext_resp resp;
818	struct mlx5_ib_ucontext *context;
819	struct mlx5_uuar_info *uuari;
820	struct mlx5_uar *uars;
821	int gross_uuars;
822	int num_uars;
823	int ver;
824	int uuarn;
825	int err;
826	int i;
827	size_t reqlen;
828
829	if (!dev->ib_active)
830		return ERR_PTR(-EAGAIN);
831
832	memset(&req, 0, sizeof(req));
833	memset(&resp, 0, sizeof(resp));
834
835	reqlen = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
836	if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
837		ver = 0;
838	else if (reqlen == sizeof(struct mlx5_ib_alloc_ucontext_req_v2))
839		ver = 2;
840	else {
841		mlx5_ib_err(dev, "request malformed, reqlen: %ld\n", (long)reqlen);
842		return ERR_PTR(-EINVAL);
843	}
844
845	err = ib_copy_from_udata(&req, udata, reqlen);
846	if (err) {
847		mlx5_ib_err(dev, "copy failed\n");
848		return ERR_PTR(err);
849	}
850
851	if (req.reserved) {
852		mlx5_ib_err(dev, "request corrupted\n");
853		return ERR_PTR(-EINVAL);
854	}
855
856	if (req.total_num_uuars == 0 || req.total_num_uuars > MLX5_MAX_UUARS) {
857		mlx5_ib_warn(dev, "wrong num_uuars: %d\n", req.total_num_uuars);
858		return ERR_PTR(-ENOMEM);
859	}
860
861	req.total_num_uuars = ALIGN(req.total_num_uuars,
862				    MLX5_NON_FP_BF_REGS_PER_PAGE);
863	if (req.num_low_latency_uuars > req.total_num_uuars - 1) {
864		mlx5_ib_warn(dev, "wrong num_low_latency_uuars: %d ( > %d)\n",
865			     req.total_num_uuars, req.total_num_uuars);
866		return ERR_PTR(-EINVAL);
867	}
868
869	num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
870	gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
871	resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
872	if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
873		resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
874	resp.cache_line_size = L1_CACHE_BYTES;
875	resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
876	resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
877	resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
878	resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
879	resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
880	set_mlx5_flags(&resp.flags, dev->mdev);
881
882	if (offsetof(struct mlx5_ib_alloc_ucontext_resp, max_desc_sz_sq_dc) < udata->outlen)
883		resp.max_desc_sz_sq_dc = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq_dc);
884
885	if (offsetof(struct mlx5_ib_alloc_ucontext_resp, atomic_arg_sizes_dc) < udata->outlen)
886		resp.atomic_arg_sizes_dc = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_dc);
887
888	context = kzalloc(sizeof(*context), GFP_KERNEL);
889	if (!context)
890		return ERR_PTR(-ENOMEM);
891
892	uuari = &context->uuari;
893	mutex_init(&uuari->lock);
894	uars = kcalloc(num_uars, sizeof(*uars), GFP_KERNEL);
895	if (!uars) {
896		err = -ENOMEM;
897		goto out_ctx;
898	}
899
900	uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
901				sizeof(*uuari->bitmap),
902				GFP_KERNEL);
903	if (!uuari->bitmap) {
904		err = -ENOMEM;
905		goto out_uar_ctx;
906	}
907	/*
908	 * clear all fast path uuars
909	 */
910	for (i = 0; i < gross_uuars; i++) {
911		uuarn = i & 3;
912		if (uuarn == 2 || uuarn == 3)
913			set_bit(i, uuari->bitmap);
914	}
915
916	uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
917	if (!uuari->count) {
918		err = -ENOMEM;
919		goto out_bitmap;
920	}
921
922	for (i = 0; i < num_uars; i++) {
923		err = mlx5_cmd_alloc_uar(dev->mdev, &uars[i].index);
924		if (err) {
925			mlx5_ib_err(dev, "uar alloc failed at %d\n", i);
926			goto out_uars;
927		}
928	}
929	for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++)
930		context->dynamic_wc_uar_index[i] = MLX5_IB_INVALID_UAR_INDEX;
931
932	INIT_LIST_HEAD(&context->db_page_list);
933	mutex_init(&context->db_page_mutex);
934
935	resp.tot_uuars = req.total_num_uuars;
936	resp.num_ports = MLX5_CAP_GEN(dev->mdev, num_ports);
937	err = ib_copy_to_udata(udata, &resp,
938			       min_t(size_t, udata->outlen, sizeof(resp)));
939	if (err)
940		goto out_uars;
941
942	uuari->ver = ver;
943	uuari->num_low_latency_uuars = req.num_low_latency_uuars;
944	uuari->uars = uars;
945	uuari->num_uars = num_uars;
946
947	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
948	    IB_LINK_LAYER_ETHERNET) {
949		err = mlx5_alloc_transport_domain(dev->mdev, &context->tdn);
950		if (err)
951			goto out_uars;
952	}
953
954	return &context->ibucontext;
955
956out_uars:
957	for (i--; i >= 0; i--)
958		mlx5_cmd_free_uar(dev->mdev, uars[i].index);
959	kfree(uuari->count);
960
961out_bitmap:
962	kfree(uuari->bitmap);
963
964out_uar_ctx:
965	kfree(uars);
966
967out_ctx:
968	kfree(context);
969	return ERR_PTR(err);
970}
971
972static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
973{
974	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
975	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
976	struct mlx5_uuar_info *uuari = &context->uuari;
977	int i;
978
979	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
980	    IB_LINK_LAYER_ETHERNET)
981		mlx5_dealloc_transport_domain(dev->mdev, context->tdn);
982
983	for (i = 0; i < uuari->num_uars; i++) {
984		if (mlx5_cmd_free_uar(dev->mdev, uuari->uars[i].index))
985			mlx5_ib_warn(dev, "failed to free UAR 0x%x\n", uuari->uars[i].index);
986	}
987	for (i = 0; i < MLX5_IB_MAX_CTX_DYNAMIC_UARS; i++) {
988		if (context->dynamic_wc_uar_index[i] != MLX5_IB_INVALID_UAR_INDEX)
989			mlx5_cmd_free_uar(dev->mdev, context->dynamic_wc_uar_index[i]);
990	}
991
992	kfree(uuari->count);
993	kfree(uuari->bitmap);
994	kfree(uuari->uars);
995	kfree(context);
996
997	return 0;
998}
999
1000static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev, int index)
1001{
1002	return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + index;
1003}
1004
1005static int get_command(unsigned long offset)
1006{
1007	return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
1008}
1009
1010static int get_arg(unsigned long offset)
1011{
1012	return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
1013}
1014
1015static int get_index(unsigned long offset)
1016{
1017	return get_arg(offset);
1018}
1019
1020static int uar_mmap(struct vm_area_struct *vma, pgprot_t prot, bool is_wc,
1021		    struct mlx5_uuar_info *uuari, struct mlx5_ib_dev *dev,
1022		    struct mlx5_ib_ucontext *context)
1023{
1024	unsigned long idx;
1025	phys_addr_t pfn;
1026
1027	if (vma->vm_end - vma->vm_start != PAGE_SIZE) {
1028		mlx5_ib_warn(dev, "wrong size, expected PAGE_SIZE(%ld) got %ld\n",
1029			     (long)PAGE_SIZE, (long)(vma->vm_end - vma->vm_start));
1030		return -EINVAL;
1031	}
1032
1033	idx = get_index(vma->vm_pgoff);
1034	if (idx >= uuari->num_uars) {
1035		mlx5_ib_warn(dev, "wrong offset, idx:%ld num_uars:%d\n",
1036			     idx, uuari->num_uars);
1037		return -EINVAL;
1038	}
1039
1040	pfn = uar_index2pfn(dev, uuari->uars[idx].index);
1041	mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn 0x%llx\n", idx,
1042		    (unsigned long long)pfn);
1043
1044	vma->vm_page_prot = prot;
1045	if (io_remap_pfn_range(vma, vma->vm_start, pfn,
1046			       PAGE_SIZE, vma->vm_page_prot)) {
1047		mlx5_ib_err(dev, "io remap failed\n");
1048		return -EAGAIN;
1049	}
1050
1051	mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA 0x%llx\n", is_wc ? "WC" : "NC",
1052		    (long)vma->vm_start, (unsigned long long)pfn << PAGE_SHIFT);
1053
1054	return 0;
1055}
1056
1057static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
1058{
1059	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1060	struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1061	struct mlx5_uuar_info *uuari = &context->uuari;
1062	unsigned long command;
1063
1064	command = get_command(vma->vm_pgoff);
1065	switch (command) {
1066	case MLX5_IB_MMAP_REGULAR_PAGE:
1067		return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1068				true,
1069				uuari, dev, context);
1070
1071		break;
1072
1073	case MLX5_IB_MMAP_WC_PAGE:
1074		return uar_mmap(vma, pgprot_writecombine(vma->vm_page_prot),
1075				true, uuari, dev, context);
1076		break;
1077
1078	case MLX5_IB_MMAP_NC_PAGE:
1079		return uar_mmap(vma, pgprot_noncached(vma->vm_page_prot),
1080				false, uuari, dev, context);
1081		break;
1082
1083	default:
1084		return -EINVAL;
1085	}
1086
1087	return 0;
1088}
1089
1090static int alloc_pa_mkey(struct mlx5_ib_dev *dev, u32 *key, u32 pdn)
1091{
1092	struct mlx5_create_mkey_mbox_in *in;
1093	struct mlx5_mkey_seg *seg;
1094	struct mlx5_core_mr mr;
1095	int err;
1096
1097	in = kzalloc(sizeof(*in), GFP_KERNEL);
1098	if (!in)
1099		return -ENOMEM;
1100
1101	seg = &in->seg;
1102	seg->flags = MLX5_PERM_LOCAL_READ | MLX5_ACCESS_MODE_PA;
1103	seg->flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64);
1104	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
1105	seg->start_addr = 0;
1106
1107	err = mlx5_core_create_mkey(dev->mdev, &mr, in, sizeof(*in),
1108				    NULL, NULL, NULL);
1109	if (err) {
1110		mlx5_ib_warn(dev, "failed to create mkey, %d\n", err);
1111		goto err_in;
1112	}
1113
1114	kfree(in);
1115	*key = mr.key;
1116
1117	return 0;
1118
1119err_in:
1120	kfree(in);
1121
1122	return err;
1123}
1124
1125static void free_pa_mkey(struct mlx5_ib_dev *dev, u32 key)
1126{
1127	struct mlx5_core_mr mr;
1128	int err;
1129
1130	memset(&mr, 0, sizeof(mr));
1131	mr.key = key;
1132	err = mlx5_core_destroy_mkey(dev->mdev, &mr);
1133	if (err)
1134		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x\n", key);
1135}
1136
1137static struct ib_pd *mlx5_ib_alloc_pd(struct ib_device *ibdev,
1138				      struct ib_ucontext *context,
1139				      struct ib_udata *udata)
1140{
1141	struct mlx5_ib_dev *dev = to_mdev(ibdev);
1142	struct mlx5_ib_alloc_pd_resp resp;
1143	struct mlx5_ib_pd *pd;
1144	int err;
1145
1146	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
1147	if (!pd)
1148		return ERR_PTR(-ENOMEM);
1149
1150	err = mlx5_core_alloc_pd(to_mdev(ibdev)->mdev, &pd->pdn);
1151	if (err) {
1152		mlx5_ib_warn(dev, "pd alloc failed\n");
1153		kfree(pd);
1154		return ERR_PTR(err);
1155	}
1156
1157	if (context) {
1158		resp.pdn = pd->pdn;
1159		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
1160			mlx5_ib_err(dev, "copy failed\n");
1161			mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1162			kfree(pd);
1163			return ERR_PTR(-EFAULT);
1164		}
1165	} else {
1166		err = alloc_pa_mkey(to_mdev(ibdev), &pd->pa_lkey, pd->pdn);
1167		if (err) {
1168			mlx5_ib_err(dev, "alloc mkey failed\n");
1169			mlx5_core_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn);
1170			kfree(pd);
1171			return ERR_PTR(err);
1172		}
1173	}
1174
1175	return &pd->ibpd;
1176}
1177
1178static int mlx5_ib_dealloc_pd(struct ib_pd *pd)
1179{
1180	struct mlx5_ib_dev *mdev = to_mdev(pd->device);
1181	struct mlx5_ib_pd *mpd = to_mpd(pd);
1182
1183	if (!pd->uobject)
1184		free_pa_mkey(mdev, mpd->pa_lkey);
1185
1186	mlx5_core_dealloc_pd(mdev->mdev, mpd->pdn);
1187	kfree(mpd);
1188
1189	return 0;
1190}
1191
1192static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1193{
1194	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1195	int err;
1196
1197	if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1198		err = -EOPNOTSUPP;
1199	else
1200		err = mlx5_core_attach_mcg(dev->mdev, gid, ibqp->qp_num);
1201	if (err)
1202		mlx5_ib_warn(dev, "failed attaching QPN 0x%x, MGID %pI6\n",
1203			     ibqp->qp_num, gid->raw);
1204
1205	return err;
1206}
1207
1208static int mlx5_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1209{
1210	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
1211	int err;
1212
1213	if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1214		err = -EOPNOTSUPP;
1215	else
1216		err = mlx5_core_detach_mcg(dev->mdev, gid, ibqp->qp_num);
1217	if (err)
1218		mlx5_ib_warn(dev, "failed detaching QPN 0x%x, MGID %pI6\n",
1219			     ibqp->qp_num, gid->raw);
1220
1221	return err;
1222}
1223
1224static int init_node_data(struct mlx5_ib_dev *dev)
1225{
1226	int err;
1227
1228	err = mlx5_query_node_desc(dev, dev->ib_dev.node_desc);
1229	if (err)
1230		return err;
1231
1232	return mlx5_query_node_guid(dev, &dev->ib_dev.node_guid);
1233}
1234
1235static ssize_t show_fw_pages(struct device *device, struct device_attribute *attr,
1236			     char *buf)
1237{
1238	struct mlx5_ib_dev *dev =
1239		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1240
1241	return sprintf(buf, "%lld\n", (long long)dev->mdev->priv.fw_pages);
1242}
1243
1244static ssize_t show_reg_pages(struct device *device,
1245			      struct device_attribute *attr, char *buf)
1246{
1247	struct mlx5_ib_dev *dev =
1248		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1249
1250	return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
1251}
1252
1253static ssize_t show_hca(struct device *device, struct device_attribute *attr,
1254			char *buf)
1255{
1256	struct mlx5_ib_dev *dev =
1257		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1258	return sprintf(buf, "MT%d\n", dev->mdev->pdev->device);
1259}
1260
1261static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
1262			   char *buf)
1263{
1264	struct mlx5_ib_dev *dev =
1265		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1266	return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
1267		       fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
1268}
1269
1270static ssize_t show_rev(struct device *device, struct device_attribute *attr,
1271			char *buf)
1272{
1273	struct mlx5_ib_dev *dev =
1274		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1275	return sprintf(buf, "%x\n", (unsigned)dev->mdev->pdev->revision);
1276}
1277
1278static ssize_t show_board(struct device *device, struct device_attribute *attr,
1279			  char *buf)
1280{
1281	struct mlx5_ib_dev *dev =
1282		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
1283	return sprintf(buf, "%.*s\n", MLX5_BOARD_ID_LEN,
1284		       dev->mdev->board_id);
1285}
1286
1287static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
1288static DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
1289static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
1290static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
1291static DEVICE_ATTR(fw_pages, S_IRUGO, show_fw_pages, NULL);
1292static DEVICE_ATTR(reg_pages, S_IRUGO, show_reg_pages, NULL);
1293
1294static struct device_attribute *mlx5_class_attributes[] = {
1295	&dev_attr_hw_rev,
1296	&dev_attr_fw_ver,
1297	&dev_attr_hca_type,
1298	&dev_attr_board_id,
1299	&dev_attr_fw_pages,
1300	&dev_attr_reg_pages,
1301};
1302
1303static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
1304{
1305	struct mlx5_ib_qp *mqp;
1306	struct mlx5_ib_cq *send_mcq, *recv_mcq;
1307	struct mlx5_core_cq *mcq;
1308	struct list_head cq_armed_list;
1309	unsigned long flags_qp;
1310	unsigned long flags_cq;
1311	unsigned long flags;
1312
1313	mlx5_ib_warn(ibdev, " started\n");
1314	INIT_LIST_HEAD(&cq_armed_list);
1315
1316	/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
1317	spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
1318	list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
1319		spin_lock_irqsave(&mqp->sq.lock, flags_qp);
1320		if (mqp->sq.tail != mqp->sq.head) {
1321			send_mcq = to_mcq(mqp->ibqp.send_cq);
1322			spin_lock_irqsave(&send_mcq->lock, flags_cq);
1323			if (send_mcq->mcq.comp &&
1324			    mqp->ibqp.send_cq->comp_handler) {
1325				if (!send_mcq->mcq.reset_notify_added) {
1326					send_mcq->mcq.reset_notify_added = 1;
1327					list_add_tail(&send_mcq->mcq.reset_notify,
1328						      &cq_armed_list);
1329				}
1330			}
1331			spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
1332		}
1333		spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
1334		spin_lock_irqsave(&mqp->rq.lock, flags_qp);
1335		/* no handling is needed for SRQ */
1336		if (!mqp->ibqp.srq) {
1337			if (mqp->rq.tail != mqp->rq.head) {
1338				recv_mcq = to_mcq(mqp->ibqp.recv_cq);
1339				spin_lock_irqsave(&recv_mcq->lock, flags_cq);
1340				if (recv_mcq->mcq.comp &&
1341				    mqp->ibqp.recv_cq->comp_handler) {
1342					if (!recv_mcq->mcq.reset_notify_added) {
1343						recv_mcq->mcq.reset_notify_added = 1;
1344						list_add_tail(&recv_mcq->mcq.reset_notify,
1345							      &cq_armed_list);
1346					}
1347				}
1348				spin_unlock_irqrestore(&recv_mcq->lock,
1349						       flags_cq);
1350			}
1351		}
1352		spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
1353	}
1354	/*At that point all inflight post send were put to be executed as of we
1355	 * lock/unlock above locks Now need to arm all involved CQs.
1356	 */
1357	list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
1358		mcq->comp(mcq);
1359	}
1360	spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
1361	mlx5_ib_warn(ibdev, " ended\n");
1362	return;
1363}
1364
1365static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
1366			  enum mlx5_dev_event event, unsigned long param)
1367{
1368	struct mlx5_ib_dev *ibdev = (struct mlx5_ib_dev *)context;
1369	struct ib_event ibev;
1370
1371	u8 port = 0;
1372
1373	switch (event) {
1374	case MLX5_DEV_EVENT_SYS_ERROR:
1375		ibdev->ib_active = false;
1376		ibev.event = IB_EVENT_DEVICE_FATAL;
1377		mlx5_ib_handle_internal_error(ibdev);
1378		break;
1379
1380	case MLX5_DEV_EVENT_PORT_UP:
1381		ibev.event = IB_EVENT_PORT_ACTIVE;
1382		port = (u8)param;
1383		break;
1384
1385	case MLX5_DEV_EVENT_PORT_DOWN:
1386	case MLX5_DEV_EVENT_PORT_INITIALIZED:
1387		ibev.event = IB_EVENT_PORT_ERR;
1388		port = (u8)param;
1389		break;
1390
1391	case MLX5_DEV_EVENT_LID_CHANGE:
1392		ibev.event = IB_EVENT_LID_CHANGE;
1393		port = (u8)param;
1394		break;
1395
1396	case MLX5_DEV_EVENT_PKEY_CHANGE:
1397		ibev.event = IB_EVENT_PKEY_CHANGE;
1398		port = (u8)param;
1399		break;
1400
1401	case MLX5_DEV_EVENT_GUID_CHANGE:
1402		ibev.event = IB_EVENT_GID_CHANGE;
1403		port = (u8)param;
1404		break;
1405
1406	case MLX5_DEV_EVENT_CLIENT_REREG:
1407		ibev.event = IB_EVENT_CLIENT_REREGISTER;
1408		port = (u8)param;
1409		break;
1410
1411	default:
1412		break;
1413	}
1414
1415	ibev.device	      = &ibdev->ib_dev;
1416	ibev.element.port_num = port;
1417
1418	if ((event != MLX5_DEV_EVENT_SYS_ERROR) &&
1419	    (port < 1 || port > ibdev->num_ports)) {
1420		mlx5_ib_warn(ibdev, "warning: event on port %d\n", port);
1421		return;
1422	}
1423
1424	if (ibdev->ib_active)
1425		ib_dispatch_event(&ibev);
1426}
1427
1428static void get_ext_port_caps(struct mlx5_ib_dev *dev)
1429{
1430	int port;
1431
1432	for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++)
1433		mlx5_query_ext_port_caps(dev, port);
1434}
1435
1436static void config_atomic_responder(struct mlx5_ib_dev *dev,
1437				    struct ib_device_attr *props)
1438{
1439	enum ib_atomic_cap cap = props->atomic_cap;
1440
1441#if 0
1442	if (cap == IB_ATOMIC_HCA ||
1443	    cap == IB_ATOMIC_GLOB)
1444#endif
1445		dev->enable_atomic_resp = 1;
1446
1447	dev->atomic_cap = cap;
1448}
1449
1450enum mlx5_addr_align {
1451	MLX5_ADDR_ALIGN_0	= 0,
1452	MLX5_ADDR_ALIGN_64	= 64,
1453	MLX5_ADDR_ALIGN_128	= 128,
1454};
1455
1456static int get_port_caps(struct mlx5_ib_dev *dev)
1457{
1458	struct ib_device_attr *dprops = NULL;
1459	struct ib_port_attr *pprops = NULL;
1460	int err = -ENOMEM;
1461	int port;
1462
1463	pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
1464	if (!pprops)
1465		goto out;
1466
1467	dprops = kmalloc(sizeof(*dprops), GFP_KERNEL);
1468	if (!dprops)
1469		goto out;
1470
1471	err = mlx5_ib_query_device(&dev->ib_dev, dprops);
1472	if (err) {
1473		mlx5_ib_warn(dev, "query_device failed %d\n", err);
1474		goto out;
1475	}
1476	config_atomic_responder(dev, dprops);
1477
1478	for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
1479		err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
1480		if (err) {
1481			mlx5_ib_warn(dev, "query_port %d failed %d\n",
1482				     port, err);
1483			break;
1484		}
1485		dev->mdev->port_caps[port - 1].pkey_table_len = dprops->max_pkeys;
1486		dev->mdev->port_caps[port - 1].gid_table_len = pprops->gid_tbl_len;
1487		mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
1488			    dprops->max_pkeys, pprops->gid_tbl_len);
1489	}
1490
1491out:
1492	kfree(pprops);
1493	kfree(dprops);
1494
1495	return err;
1496}
1497
1498static void destroy_umrc_res(struct mlx5_ib_dev *dev)
1499{
1500	int err;
1501
1502	err = mlx5_mr_cache_cleanup(dev);
1503	if (err)
1504		mlx5_ib_warn(dev, "mr cache cleanup failed\n");
1505
1506	ib_dereg_mr(dev->umrc.mr);
1507	ib_dealloc_pd(dev->umrc.pd);
1508}
1509
1510enum {
1511	MAX_UMR_WR = 128,
1512};
1513
1514static int create_umr_res(struct mlx5_ib_dev *dev)
1515{
1516	struct ib_pd *pd;
1517	struct ib_mr *mr;
1518	int ret;
1519
1520	pd = ib_alloc_pd(&dev->ib_dev);
1521	if (IS_ERR(pd)) {
1522		mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n");
1523		ret = PTR_ERR(pd);
1524		goto error_0;
1525	}
1526
1527	mr = ib_get_dma_mr(pd,  IB_ACCESS_LOCAL_WRITE);
1528	if (IS_ERR(mr)) {
1529		mlx5_ib_dbg(dev, "Couldn't create DMA MR for sync UMR QP\n");
1530		ret = PTR_ERR(mr);
1531		goto error_1;
1532	}
1533
1534	dev->umrc.mr = mr;
1535	dev->umrc.pd = pd;
1536
1537	ret = mlx5_mr_cache_init(dev);
1538	if (ret) {
1539		mlx5_ib_warn(dev, "mr cache init failed %d\n", ret);
1540		goto error_4;
1541	}
1542
1543	return 0;
1544
1545error_4:
1546	ib_dereg_mr(mr);
1547error_1:
1548	ib_dealloc_pd(pd);
1549error_0:
1550	return ret;
1551}
1552
1553static int create_dev_resources(struct mlx5_ib_resources *devr)
1554{
1555	struct ib_srq_init_attr attr;
1556	struct mlx5_ib_dev *dev;
1557	int ret = 0;
1558
1559	dev = container_of(devr, struct mlx5_ib_dev, devr);
1560
1561	devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
1562	if (IS_ERR(devr->p0)) {
1563		ret = PTR_ERR(devr->p0);
1564		goto error0;
1565	}
1566	devr->p0->device  = &dev->ib_dev;
1567	devr->p0->uobject = NULL;
1568	atomic_set(&devr->p0->usecnt, 0);
1569
1570	devr->c0 = mlx5_ib_create_cq(&dev->ib_dev, 1, 0, NULL, NULL);
1571	if (IS_ERR(devr->c0)) {
1572		ret = PTR_ERR(devr->c0);
1573		goto error1;
1574	}
1575	devr->c0->device        = &dev->ib_dev;
1576	devr->c0->uobject       = NULL;
1577	devr->c0->comp_handler  = NULL;
1578	devr->c0->event_handler = NULL;
1579	devr->c0->cq_context    = NULL;
1580	atomic_set(&devr->c0->usecnt, 0);
1581
1582	devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1583	if (IS_ERR(devr->x0)) {
1584		ret = PTR_ERR(devr->x0);
1585		goto error2;
1586	}
1587	devr->x0->device = &dev->ib_dev;
1588	devr->x0->inode = NULL;
1589	atomic_set(&devr->x0->usecnt, 0);
1590	mutex_init(&devr->x0->tgt_qp_mutex);
1591	INIT_LIST_HEAD(&devr->x0->tgt_qp_list);
1592
1593	devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL, NULL);
1594	if (IS_ERR(devr->x1)) {
1595		ret = PTR_ERR(devr->x1);
1596		goto error3;
1597	}
1598	devr->x1->device = &dev->ib_dev;
1599	devr->x1->inode = NULL;
1600	atomic_set(&devr->x1->usecnt, 0);
1601	mutex_init(&devr->x1->tgt_qp_mutex);
1602	INIT_LIST_HEAD(&devr->x1->tgt_qp_list);
1603
1604	memset(&attr, 0, sizeof(attr));
1605	attr.attr.max_sge = 1;
1606	attr.attr.max_wr = 1;
1607	attr.srq_type = IB_SRQT_XRC;
1608	attr.ext.xrc.cq = devr->c0;
1609	attr.ext.xrc.xrcd = devr->x0;
1610
1611	devr->s0 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1612	if (IS_ERR(devr->s0)) {
1613		ret = PTR_ERR(devr->s0);
1614		goto error4;
1615	}
1616	devr->s0->device	= &dev->ib_dev;
1617	devr->s0->pd		= devr->p0;
1618	devr->s0->uobject       = NULL;
1619	devr->s0->event_handler = NULL;
1620	devr->s0->srq_context   = NULL;
1621	devr->s0->srq_type      = IB_SRQT_XRC;
1622	devr->s0->ext.xrc.xrcd  = devr->x0;
1623	devr->s0->ext.xrc.cq	= devr->c0;
1624	atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt);
1625	atomic_inc(&devr->s0->ext.xrc.cq->usecnt);
1626	atomic_inc(&devr->p0->usecnt);
1627	atomic_set(&devr->s0->usecnt, 0);
1628
1629	memset(&attr, 0, sizeof(attr));
1630	attr.attr.max_sge = 1;
1631	attr.attr.max_wr = 1;
1632	attr.srq_type = IB_SRQT_BASIC;
1633	devr->s1 = mlx5_ib_create_srq(devr->p0, &attr, NULL);
1634	if (IS_ERR(devr->s1)) {
1635		ret = PTR_ERR(devr->s1);
1636		goto error5;
1637	}
1638	devr->s1->device	= &dev->ib_dev;
1639	devr->s1->pd		= devr->p0;
1640	devr->s1->uobject       = NULL;
1641	devr->s1->event_handler = NULL;
1642	devr->s1->srq_context   = NULL;
1643	devr->s1->srq_type      = IB_SRQT_BASIC;
1644	devr->s1->ext.xrc.cq	= devr->c0;
1645	atomic_inc(&devr->p0->usecnt);
1646	atomic_set(&devr->s1->usecnt, 0);
1647
1648	return 0;
1649
1650error5:
1651	mlx5_ib_destroy_srq(devr->s0);
1652error4:
1653	mlx5_ib_dealloc_xrcd(devr->x1);
1654error3:
1655	mlx5_ib_dealloc_xrcd(devr->x0);
1656error2:
1657	mlx5_ib_destroy_cq(devr->c0);
1658error1:
1659	mlx5_ib_dealloc_pd(devr->p0);
1660error0:
1661	return ret;
1662}
1663
1664static void destroy_dev_resources(struct mlx5_ib_resources *devr)
1665{
1666	mlx5_ib_destroy_srq(devr->s1);
1667	mlx5_ib_destroy_srq(devr->s0);
1668	mlx5_ib_dealloc_xrcd(devr->x0);
1669	mlx5_ib_dealloc_xrcd(devr->x1);
1670	mlx5_ib_destroy_cq(devr->c0);
1671	mlx5_ib_dealloc_pd(devr->p0);
1672}
1673
1674static u32 get_core_cap_flags(struct ib_device *ibdev)
1675{
1676	struct mlx5_ib_dev *dev = to_mdev(ibdev);
1677	enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
1678	u8 l3_type_cap = MLX5_CAP_ROCE(dev->mdev, l3_type);
1679	u8 roce_version_cap = MLX5_CAP_ROCE(dev->mdev, roce_version);
1680	u32 ret = 0;
1681
1682	if (ll == IB_LINK_LAYER_INFINIBAND)
1683		return RDMA_CORE_PORT_IBA_IB;
1684
1685	ret = RDMA_CORE_PORT_RAW_PACKET;
1686
1687	if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
1688		return ret;
1689
1690	if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
1691		return ret;
1692
1693	if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
1694		ret |= RDMA_CORE_PORT_IBA_ROCE;
1695
1696	if (roce_version_cap & MLX5_ROCE_VERSION_2_CAP)
1697		ret |= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
1698
1699	return ret;
1700}
1701
1702static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
1703			       struct ib_port_immutable *immutable)
1704{
1705	struct ib_port_attr attr;
1706	struct mlx5_ib_dev *dev = to_mdev(ibdev);
1707	enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
1708	int err;
1709
1710	immutable->core_cap_flags = get_core_cap_flags(ibdev);
1711
1712	err = ib_query_port(ibdev, port_num, &attr);
1713	if (err)
1714		return err;
1715
1716	immutable->pkey_tbl_len = attr.pkey_tbl_len;
1717	immutable->gid_tbl_len = attr.gid_tbl_len;
1718	immutable->core_cap_flags = get_core_cap_flags(ibdev);
1719	if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
1720		immutable->max_mad_size = IB_MGMT_MAD_SIZE;
1721
1722	return 0;
1723}
1724
1725static void enable_dc_tracer(struct mlx5_ib_dev *dev)
1726{
1727	struct device *device = dev->ib_dev.dma_device;
1728	struct mlx5_dc_tracer *dct = &dev->dctr;
1729	int order;
1730	void *tmp;
1731	int size;
1732	int err;
1733
1734	size = MLX5_CAP_GEN(dev->mdev, num_ports) * 4096;
1735	if (size <= PAGE_SIZE)
1736		order = 0;
1737	else
1738		order = 1;
1739
1740	dct->pg = alloc_pages(GFP_KERNEL, order);
1741	if (!dct->pg) {
1742		mlx5_ib_err(dev, "failed to allocate %d pages\n", order);
1743		return;
1744	}
1745
1746	tmp = page_address(dct->pg);
1747	memset(tmp, 0xff, size);
1748
1749	dct->size = size;
1750	dct->order = order;
1751	dct->dma = dma_map_page(device, dct->pg, 0, size, DMA_FROM_DEVICE);
1752	if (dma_mapping_error(device, dct->dma)) {
1753		mlx5_ib_err(dev, "dma mapping error\n");
1754		goto map_err;
1755	}
1756
1757	err = mlx5_core_set_dc_cnak_trace(dev->mdev, 1, dct->dma);
1758	if (err) {
1759		mlx5_ib_warn(dev, "failed to enable DC tracer\n");
1760		goto cmd_err;
1761	}
1762
1763	return;
1764
1765cmd_err:
1766	dma_unmap_page(device, dct->dma, size, DMA_FROM_DEVICE);
1767map_err:
1768	__free_pages(dct->pg, dct->order);
1769	dct->pg = NULL;
1770}
1771
1772static void disable_dc_tracer(struct mlx5_ib_dev *dev)
1773{
1774	struct device *device = dev->ib_dev.dma_device;
1775	struct mlx5_dc_tracer *dct = &dev->dctr;
1776	int err;
1777
1778	if (!dct->pg)
1779		return;
1780
1781	err = mlx5_core_set_dc_cnak_trace(dev->mdev, 0, dct->dma);
1782	if (err) {
1783		mlx5_ib_warn(dev, "failed to disable DC tracer\n");
1784		return;
1785	}
1786
1787	dma_unmap_page(device, dct->dma, dct->size, DMA_FROM_DEVICE);
1788	__free_pages(dct->pg, dct->order);
1789	dct->pg = NULL;
1790}
1791
1792enum {
1793	MLX5_DC_CNAK_SIZE		= 128,
1794	MLX5_NUM_BUF_IN_PAGE		= PAGE_SIZE / MLX5_DC_CNAK_SIZE,
1795	MLX5_CNAK_TX_CQ_SIGNAL_FACTOR	= 128,
1796	MLX5_DC_CNAK_SL			= 0,
1797	MLX5_DC_CNAK_VL			= 0,
1798};
1799
1800static int init_dc_improvements(struct mlx5_ib_dev *dev)
1801{
1802	if (!mlx5_core_is_pf(dev->mdev))
1803		return 0;
1804
1805	if (!(MLX5_CAP_GEN(dev->mdev, dc_cnak_trace)))
1806		return 0;
1807
1808	enable_dc_tracer(dev);
1809
1810	return 0;
1811}
1812
1813static void cleanup_dc_improvements(struct mlx5_ib_dev *dev)
1814{
1815
1816	disable_dc_tracer(dev);
1817}
1818
1819static void mlx5_ib_dealloc_q_port_counter(struct mlx5_ib_dev *dev, u8 port_num)
1820{
1821	mlx5_vport_dealloc_q_counter(dev->mdev,
1822				     MLX5_INTERFACE_PROTOCOL_IB,
1823				     dev->port[port_num].q_cnt_id);
1824	dev->port[port_num].q_cnt_id = 0;
1825}
1826
1827static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
1828{
1829	unsigned int i;
1830
1831	for (i = 0; i < dev->num_ports; i++)
1832		mlx5_ib_dealloc_q_port_counter(dev, i);
1833}
1834
1835static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
1836{
1837	int i;
1838	int ret;
1839
1840	for (i = 0; i < dev->num_ports; i++) {
1841		ret = mlx5_vport_alloc_q_counter(dev->mdev,
1842						 MLX5_INTERFACE_PROTOCOL_IB,
1843						 &dev->port[i].q_cnt_id);
1844		if (ret) {
1845			mlx5_ib_warn(dev,
1846				     "couldn't allocate queue counter for port %d\n",
1847				     i + 1);
1848			goto dealloc_counters;
1849		}
1850	}
1851
1852	return 0;
1853
1854dealloc_counters:
1855	while (--i >= 0)
1856		mlx5_ib_dealloc_q_port_counter(dev, i);
1857
1858	return ret;
1859}
1860
1861struct port_attribute {
1862	struct attribute attr;
1863	ssize_t (*show)(struct mlx5_ib_port *,
1864			struct port_attribute *, char *buf);
1865	ssize_t (*store)(struct mlx5_ib_port *,
1866			 struct port_attribute *,
1867			 const char *buf, size_t count);
1868};
1869
1870struct port_counter_attribute {
1871	struct port_attribute	attr;
1872	size_t			offset;
1873};
1874
1875static ssize_t port_attr_show(struct kobject *kobj,
1876			      struct attribute *attr, char *buf)
1877{
1878	struct port_attribute *port_attr =
1879		container_of(attr, struct port_attribute, attr);
1880	struct mlx5_ib_port_sysfs_group *p =
1881		container_of(kobj, struct mlx5_ib_port_sysfs_group,
1882			     kobj);
1883	struct mlx5_ib_port *mibport = container_of(p, struct mlx5_ib_port,
1884						    group);
1885
1886	if (!port_attr->show)
1887		return -EIO;
1888
1889	return port_attr->show(mibport, port_attr, buf);
1890}
1891
1892static ssize_t show_port_counter(struct mlx5_ib_port *p,
1893				 struct port_attribute *port_attr,
1894				 char *buf)
1895{
1896	int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
1897	struct port_counter_attribute *counter_attr =
1898		container_of(port_attr, struct port_counter_attribute, attr);
1899	void *out;
1900	int ret;
1901
1902	out = mlx5_vzalloc(outlen);
1903	if (!out)
1904		return -ENOMEM;
1905
1906	ret = mlx5_vport_query_q_counter(p->dev->mdev,
1907					 p->q_cnt_id, 0,
1908					 out, outlen);
1909	if (ret)
1910		goto free;
1911
1912	ret = sprintf(buf, "%d\n",
1913		      be32_to_cpu(*(__be32 *)(out + counter_attr->offset)));
1914
1915free:
1916	kfree(out);
1917	return ret;
1918}
1919
1920#define PORT_COUNTER_ATTR(_name)					\
1921struct port_counter_attribute port_counter_attr_##_name = {		\
1922	.attr  = __ATTR(_name, S_IRUGO, show_port_counter, NULL),	\
1923	.offset = MLX5_BYTE_OFF(query_q_counter_out, _name)		\
1924}
1925
1926static PORT_COUNTER_ATTR(rx_write_requests);
1927static PORT_COUNTER_ATTR(rx_read_requests);
1928static PORT_COUNTER_ATTR(rx_atomic_requests);
1929static PORT_COUNTER_ATTR(rx_dct_connect);
1930static PORT_COUNTER_ATTR(out_of_buffer);
1931static PORT_COUNTER_ATTR(out_of_sequence);
1932static PORT_COUNTER_ATTR(duplicate_request);
1933static PORT_COUNTER_ATTR(rnr_nak_retry_err);
1934static PORT_COUNTER_ATTR(packet_seq_err);
1935static PORT_COUNTER_ATTR(implied_nak_seq_err);
1936static PORT_COUNTER_ATTR(local_ack_timeout_err);
1937
1938static struct attribute *counter_attrs[] = {
1939	&port_counter_attr_rx_write_requests.attr.attr,
1940	&port_counter_attr_rx_read_requests.attr.attr,
1941	&port_counter_attr_rx_atomic_requests.attr.attr,
1942	&port_counter_attr_rx_dct_connect.attr.attr,
1943	&port_counter_attr_out_of_buffer.attr.attr,
1944	&port_counter_attr_out_of_sequence.attr.attr,
1945	&port_counter_attr_duplicate_request.attr.attr,
1946	&port_counter_attr_rnr_nak_retry_err.attr.attr,
1947	&port_counter_attr_packet_seq_err.attr.attr,
1948	&port_counter_attr_implied_nak_seq_err.attr.attr,
1949	&port_counter_attr_local_ack_timeout_err.attr.attr,
1950	NULL
1951};
1952
1953static struct attribute_group port_counters_group = {
1954	.name  = "counters",
1955	.attrs  = counter_attrs
1956};
1957
1958static const struct sysfs_ops port_sysfs_ops = {
1959	.show = port_attr_show
1960};
1961
1962static struct kobj_type port_type = {
1963	.sysfs_ops     = &port_sysfs_ops,
1964};
1965
1966static int add_port_attrs(struct mlx5_ib_dev *dev,
1967			  struct kobject *parent,
1968			  struct mlx5_ib_port_sysfs_group *port,
1969			  u8 port_num)
1970{
1971	int ret;
1972
1973	ret = kobject_init_and_add(&port->kobj, &port_type,
1974				   parent,
1975				   "%d", port_num);
1976	if (ret)
1977		return ret;
1978
1979	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
1980	    MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
1981		ret = sysfs_create_group(&port->kobj, &port_counters_group);
1982		if (ret)
1983			goto put_kobj;
1984	}
1985
1986	port->enabled = true;
1987	return ret;
1988
1989put_kobj:
1990	kobject_put(&port->kobj);
1991	return ret;
1992}
1993
1994static void destroy_ports_attrs(struct mlx5_ib_dev *dev,
1995				unsigned int num_ports)
1996{
1997	unsigned int i;
1998
1999	for (i = 0; i < num_ports; i++) {
2000		struct mlx5_ib_port_sysfs_group *port =
2001			&dev->port[i].group;
2002
2003		if (!port->enabled)
2004			continue;
2005
2006		if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
2007		    MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
2008			sysfs_remove_group(&port->kobj,
2009					   &port_counters_group);
2010		kobject_put(&port->kobj);
2011		port->enabled = false;
2012	}
2013
2014	if (dev->ports_parent) {
2015		kobject_put(dev->ports_parent);
2016		dev->ports_parent = NULL;
2017	}
2018}
2019
2020static int create_port_attrs(struct mlx5_ib_dev *dev)
2021{
2022	int ret = 0;
2023	unsigned int i = 0;
2024	struct device *device = &dev->ib_dev.dev;
2025
2026	dev->ports_parent = kobject_create_and_add("mlx5_ports",
2027						   &device->kobj);
2028	if (!dev->ports_parent)
2029		return -ENOMEM;
2030
2031	for (i = 0; i < dev->num_ports; i++) {
2032		ret = add_port_attrs(dev,
2033				     dev->ports_parent,
2034				     &dev->port[i].group,
2035				     i + 1);
2036
2037		if (ret)
2038			goto _destroy_ports_attrs;
2039	}
2040
2041	return 0;
2042
2043_destroy_ports_attrs:
2044	destroy_ports_attrs(dev, i);
2045	return ret;
2046}
2047
2048static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
2049{
2050	struct mlx5_ib_dev *dev;
2051	int err;
2052	int i;
2053
2054	printk_once(KERN_INFO "%s", mlx5_version);
2055
2056	dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
2057	if (!dev)
2058		return NULL;
2059
2060	dev->mdev = mdev;
2061
2062	dev->port = kcalloc(MLX5_CAP_GEN(mdev, num_ports), sizeof(*dev->port),
2063			     GFP_KERNEL);
2064	if (!dev->port)
2065		goto err_dealloc;
2066
2067	for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2068		dev->port[i].dev = dev;
2069		dev->port[i].port_num = i;
2070		dev->port[i].port_gone = 0;
2071		memset(dev->port[i].gid_table, 0, sizeof(dev->port[i].gid_table));
2072	}
2073
2074	err = get_port_caps(dev);
2075	if (err)
2076		goto err_free_port;
2077
2078	if (mlx5_use_mad_ifc(dev))
2079		get_ext_port_caps(dev);
2080
2081	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2082	    IB_LINK_LAYER_ETHERNET) {
2083		if (MLX5_CAP_GEN(mdev, roce)) {
2084			err = mlx5_nic_vport_enable_roce(mdev);
2085			if (err)
2086				goto err_free_port;
2087		} else {
2088			goto err_free_port;
2089		}
2090	}
2091
2092	MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock);
2093
2094	strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
2095	dev->ib_dev.owner		= THIS_MODULE;
2096	dev->ib_dev.node_type		= RDMA_NODE_IB_CA;
2097	dev->ib_dev.local_dma_lkey	= mdev->special_contexts.resd_lkey;
2098	dev->num_ports		= MLX5_CAP_GEN(mdev, num_ports);
2099	dev->ib_dev.phys_port_cnt     = dev->num_ports;
2100	dev->ib_dev.num_comp_vectors    =
2101		dev->mdev->priv.eq_table.num_comp_vectors;
2102	dev->ib_dev.dma_device	= &mdev->pdev->dev;
2103
2104	dev->ib_dev.uverbs_abi_ver	= MLX5_IB_UVERBS_ABI_VERSION;
2105	dev->ib_dev.uverbs_cmd_mask	=
2106		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
2107		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
2108		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
2109		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
2110		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
2111		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
2112		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
2113		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
2114		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
2115		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
2116		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
2117		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
2118		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
2119		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
2120		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
2121		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
2122		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
2123		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
2124		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
2125		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
2126		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
2127		(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)		|
2128		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
2129
2130	dev->ib_dev.query_device	= mlx5_ib_query_device;
2131	dev->ib_dev.query_port		= mlx5_ib_query_port;
2132	dev->ib_dev.get_link_layer	= mlx5_ib_port_link_layer;
2133	dev->ib_dev.query_gid		= mlx5_ib_query_gid;
2134	dev->ib_dev.query_pkey		= mlx5_ib_query_pkey;
2135	dev->ib_dev.modify_device	= mlx5_ib_modify_device;
2136	dev->ib_dev.modify_port		= mlx5_ib_modify_port;
2137	dev->ib_dev.alloc_ucontext	= mlx5_ib_alloc_ucontext;
2138	dev->ib_dev.dealloc_ucontext	= mlx5_ib_dealloc_ucontext;
2139	dev->ib_dev.mmap		= mlx5_ib_mmap;
2140	dev->ib_dev.alloc_pd		= mlx5_ib_alloc_pd;
2141	dev->ib_dev.dealloc_pd		= mlx5_ib_dealloc_pd;
2142	dev->ib_dev.create_ah		= mlx5_ib_create_ah;
2143	dev->ib_dev.query_ah		= mlx5_ib_query_ah;
2144	dev->ib_dev.destroy_ah		= mlx5_ib_destroy_ah;
2145	dev->ib_dev.create_srq		= mlx5_ib_create_srq;
2146	dev->ib_dev.modify_srq		= mlx5_ib_modify_srq;
2147	dev->ib_dev.query_srq		= mlx5_ib_query_srq;
2148	dev->ib_dev.destroy_srq		= mlx5_ib_destroy_srq;
2149	dev->ib_dev.post_srq_recv	= mlx5_ib_post_srq_recv;
2150	dev->ib_dev.create_qp		= mlx5_ib_create_qp;
2151	dev->ib_dev.modify_qp		= mlx5_ib_modify_qp;
2152	dev->ib_dev.query_qp		= mlx5_ib_query_qp;
2153	dev->ib_dev.destroy_qp		= mlx5_ib_destroy_qp;
2154	dev->ib_dev.post_send		= mlx5_ib_post_send;
2155	dev->ib_dev.post_recv		= mlx5_ib_post_recv;
2156	dev->ib_dev.create_cq		= mlx5_ib_create_cq;
2157	dev->ib_dev.modify_cq		= mlx5_ib_modify_cq;
2158	dev->ib_dev.resize_cq		= mlx5_ib_resize_cq;
2159	dev->ib_dev.destroy_cq		= mlx5_ib_destroy_cq;
2160	dev->ib_dev.poll_cq		= mlx5_ib_poll_cq;
2161	dev->ib_dev.req_notify_cq	= mlx5_ib_arm_cq;
2162	dev->ib_dev.get_dma_mr		= mlx5_ib_get_dma_mr;
2163	dev->ib_dev.reg_user_mr		= mlx5_ib_reg_user_mr;
2164	dev->ib_dev.reg_phys_mr		= mlx5_ib_reg_phys_mr;
2165	dev->ib_dev.dereg_mr		= mlx5_ib_dereg_mr;
2166	dev->ib_dev.attach_mcast	= mlx5_ib_mcg_attach;
2167	dev->ib_dev.detach_mcast	= mlx5_ib_mcg_detach;
2168	dev->ib_dev.process_mad		= mlx5_ib_process_mad;
2169	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
2170	dev->ib_dev.alloc_fast_reg_mr	= mlx5_ib_alloc_fast_reg_mr;
2171	dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
2172	dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
2173
2174	if (MLX5_CAP_GEN(mdev, xrc)) {
2175		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
2176		dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
2177		dev->ib_dev.uverbs_cmd_mask |=
2178			(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2179			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2180	}
2181
2182	err = init_node_data(dev);
2183	if (err)
2184		goto err_disable_roce;
2185
2186	mutex_init(&dev->cap_mask_mutex);
2187	INIT_LIST_HEAD(&dev->qp_list);
2188	spin_lock_init(&dev->reset_flow_resource_lock);
2189
2190	err = create_dev_resources(&dev->devr);
2191	if (err)
2192		goto err_disable_roce;
2193
2194
2195	err = mlx5_ib_alloc_q_counters(dev);
2196	if (err)
2197		goto err_odp;
2198
2199	err = ib_register_device(&dev->ib_dev, NULL);
2200	if (err)
2201		goto err_q_cnt;
2202
2203	err = create_umr_res(dev);
2204	if (err)
2205		goto err_dev;
2206
2207	if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2208	    MLX5_CAP_PORT_TYPE_IB) {
2209		if (init_dc_improvements(dev))
2210			mlx5_ib_dbg(dev, "init_dc_improvements - continuing\n");
2211	}
2212
2213	err = create_port_attrs(dev);
2214	if (err)
2215		goto err_dc;
2216
2217	for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2218		err = device_create_file(&dev->ib_dev.dev,
2219					 mlx5_class_attributes[i]);
2220		if (err)
2221			goto err_port_attrs;
2222	}
2223
2224	if (1) {
2225		struct thread *rl_thread = NULL;
2226		struct proc *rl_proc = NULL;
2227
2228		for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2229			(void) kproc_kthread_add(mlx5_ib_roce_port_update, dev->port + i, &rl_proc, &rl_thread,
2230			    RFHIGHPID, 0, "mlx5-ib-roce-port", "mlx5-ib-roce_port-%d", i);
2231		}
2232	}
2233
2234	dev->ib_active = true;
2235
2236	return dev;
2237
2238err_port_attrs:
2239	destroy_ports_attrs(dev, dev->num_ports);
2240
2241err_dc:
2242	if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2243	    MLX5_CAP_PORT_TYPE_IB)
2244		cleanup_dc_improvements(dev);
2245	destroy_umrc_res(dev);
2246
2247err_dev:
2248	ib_unregister_device(&dev->ib_dev);
2249
2250err_q_cnt:
2251	mlx5_ib_dealloc_q_counters(dev);
2252
2253err_odp:
2254	destroy_dev_resources(&dev->devr);
2255
2256err_disable_roce:
2257	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2258	    IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2259		mlx5_nic_vport_disable_roce(mdev);
2260err_free_port:
2261	kfree(dev->port);
2262
2263err_dealloc:
2264	ib_dealloc_device((struct ib_device *)dev);
2265
2266	return NULL;
2267}
2268
2269static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
2270{
2271	struct mlx5_ib_dev *dev = context;
2272	int i;
2273
2274	for (i = 0; i < MLX5_CAP_GEN(mdev, num_ports); i++) {
2275		dev->port[i].port_gone = 1;
2276		while (dev->port[i].port_gone != 2)
2277			pause("W", hz);
2278	}
2279
2280	for (i = 0; i < ARRAY_SIZE(mlx5_class_attributes); i++) {
2281		device_remove_file(&dev->ib_dev.dev,
2282		    mlx5_class_attributes[i]);
2283	}
2284
2285	destroy_ports_attrs(dev, dev->num_ports);
2286	if (MLX5_CAP_GEN(dev->mdev, port_type) ==
2287	    MLX5_CAP_PORT_TYPE_IB)
2288		cleanup_dc_improvements(dev);
2289	mlx5_ib_dealloc_q_counters(dev);
2290	ib_unregister_device(&dev->ib_dev);
2291	destroy_umrc_res(dev);
2292	destroy_dev_resources(&dev->devr);
2293
2294	if (mlx5_ib_port_link_layer(&dev->ib_dev, 1) ==
2295	    IB_LINK_LAYER_ETHERNET && MLX5_CAP_GEN(mdev, roce))
2296		mlx5_nic_vport_disable_roce(mdev);
2297
2298	kfree(dev->port);
2299	ib_dealloc_device(&dev->ib_dev);
2300}
2301
2302static struct mlx5_interface mlx5_ib_interface = {
2303	.add            = mlx5_ib_add,
2304	.remove         = mlx5_ib_remove,
2305	.event          = mlx5_ib_event,
2306	.protocol	= MLX5_INTERFACE_PROTOCOL_IB,
2307};
2308
2309static int __init mlx5_ib_init(void)
2310{
2311	int err;
2312
2313	if (deprecated_prof_sel != 2)
2314		printf("mlx5_ib: WARN: ""prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
2315
2316	err = mlx5_register_interface(&mlx5_ib_interface);
2317	if (err)
2318		goto clean_odp;
2319
2320	mlx5_ib_wq = create_singlethread_workqueue("mlx5_ib_wq");
2321	if (!mlx5_ib_wq) {
2322		printf("mlx5_ib: ERR: ""%s: failed to create mlx5_ib_wq\n", __func__);
2323		goto err_unreg;
2324	}
2325
2326	return err;
2327
2328err_unreg:
2329	mlx5_unregister_interface(&mlx5_ib_interface);
2330
2331clean_odp:
2332	return err;
2333}
2334
2335static void __exit mlx5_ib_cleanup(void)
2336{
2337	destroy_workqueue(mlx5_ib_wq);
2338	mlx5_unregister_interface(&mlx5_ib_interface);
2339}
2340
2341module_init_order(mlx5_ib_init, SI_ORDER_THIRD);
2342module_exit_order(mlx5_ib_cleanup, SI_ORDER_THIRD);
2343