mlx4_ib_main.c revision 331794
1/*
2 * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *      - Redistributions of source code must retain the above
16 *        copyright notice, this list of conditions and the following
17 *        disclaimer.
18 *
19 *      - Redistributions in binary form must reproduce the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer in the documentation and/or other materials
22 *        provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#define	LINUXKPI_PARAM_PREFIX mlx4_
35
36#include <linux/module.h>
37#include <linux/slab.h>
38#include <linux/errno.h>
39#include <linux/etherdevice.h>
40#include <linux/netdevice.h>
41#include <linux/inetdevice.h>
42#include <linux/if_vlan.h>
43#include <linux/fs.h>
44#include <linux/rcupdate.h>
45#include <linux/notifier.h>
46#include <linux/delay.h>
47
48#include <net/ipv6.h>
49
50#include <rdma/ib_smi.h>
51#include <rdma/ib_user_verbs.h>
52#include <rdma/ib_addr.h>
53#include <rdma/ib_cache.h>
54
55#include <dev/mlx4/driver.h>
56#include <dev/mlx4/cmd.h>
57#include <dev/mlx4/qp.h>
58#include <linux/sched.h>
59#include <linux/page.h>
60#include <linux/printk.h>
61#include "mlx4_ib.h"
62#include <rdma/mlx4-abi.h>
63#include "wc.h"
64
65#define DRV_NAME	MLX4_IB_DRV_NAME
66#ifndef DRV_VERSION
67#define DRV_VERSION	"3.4.1"
68#endif
69#define DRV_RELDATE	"February 2018"
70
71#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
72#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
73#define MLX4_IB_CARD_REV_A0   0xA0
74
75MODULE_AUTHOR("Roland Dreier");
76MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
77MODULE_LICENSE("Dual BSD/GPL");
78
79int mlx4_ib_sm_guid_assign = 0;
80module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
81MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
82
83static const char mlx4_ib_version[] =
84	DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
85	DRV_VERSION " (" DRV_RELDATE ")\n";
86
87static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
88
89static struct workqueue_struct *wq;
90
91static void init_query_mad(struct ib_smp *mad)
92{
93	mad->base_version  = 1;
94	mad->mgmt_class    = IB_MGMT_CLASS_SUBN_LID_ROUTED;
95	mad->class_version = 1;
96	mad->method	   = IB_MGMT_METHOD_GET;
97}
98
99static int check_flow_steering_support(struct mlx4_dev *dev)
100{
101	int eth_num_ports = 0;
102	int ib_num_ports = 0;
103
104	int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
105
106	if (dmfs) {
107		int i;
108		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
109			eth_num_ports++;
110		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
111			ib_num_ports++;
112		dmfs &= (!ib_num_ports ||
113			 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
114			(!eth_num_ports ||
115			 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
116		if (ib_num_ports && mlx4_is_mfunc(dev)) {
117			pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
118			dmfs = 0;
119		}
120	}
121	return dmfs;
122}
123
124static int num_ib_ports(struct mlx4_dev *dev)
125{
126	int ib_ports = 0;
127	int i;
128
129	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
130		ib_ports++;
131
132	return ib_ports;
133}
134
135static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, u8 port_num)
136{
137	struct mlx4_ib_dev *ibdev = to_mdev(device);
138	struct net_device *dev;
139
140	rcu_read_lock();
141	dev = mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port_num);
142
143#if 0
144	if (dev) {
145		if (mlx4_is_bonded(ibdev->dev)) {
146			struct net_device *upper = NULL;
147
148			upper = netdev_master_upper_dev_get_rcu(dev);
149			if (upper) {
150				struct net_device *active;
151
152				active = bond_option_active_slave_get_rcu(netdev_priv(upper));
153				if (active)
154					dev = active;
155			}
156		}
157	}
158#endif
159	if (dev)
160		dev_hold(dev);
161
162	rcu_read_unlock();
163	return dev;
164}
165
166static int mlx4_ib_update_gids_v1(struct gid_entry *gids,
167				  struct mlx4_ib_dev *ibdev,
168				  u8 port_num)
169{
170	struct mlx4_cmd_mailbox *mailbox;
171	int err;
172	struct mlx4_dev *dev = ibdev->dev;
173	int i;
174	union ib_gid *gid_tbl;
175
176	mailbox = mlx4_alloc_cmd_mailbox(dev);
177	if (IS_ERR(mailbox))
178		return -ENOMEM;
179
180	gid_tbl = mailbox->buf;
181
182	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
183		memcpy(&gid_tbl[i], &gids[i].gid, sizeof(union ib_gid));
184
185	err = mlx4_cmd(dev, mailbox->dma,
186		       MLX4_SET_PORT_GID_TABLE << 8 | port_num,
187		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
188		       MLX4_CMD_WRAPPED);
189	if (mlx4_is_bonded(dev))
190		err += mlx4_cmd(dev, mailbox->dma,
191				MLX4_SET_PORT_GID_TABLE << 8 | 2,
192				1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
193				MLX4_CMD_WRAPPED);
194
195	mlx4_free_cmd_mailbox(dev, mailbox);
196	return err;
197}
198
199static int mlx4_ib_update_gids_v1_v2(struct gid_entry *gids,
200				     struct mlx4_ib_dev *ibdev,
201				     u8 port_num)
202{
203	struct mlx4_cmd_mailbox *mailbox;
204	int err;
205	struct mlx4_dev *dev = ibdev->dev;
206	int i;
207	struct {
208		union ib_gid	gid;
209		__be32		rsrvd1[2];
210		__be16		rsrvd2;
211		u8		type;
212		u8		version;
213		__be32		rsrvd3;
214	} *gid_tbl;
215
216	mailbox = mlx4_alloc_cmd_mailbox(dev);
217	if (IS_ERR(mailbox))
218		return -ENOMEM;
219
220	gid_tbl = mailbox->buf;
221	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
222		memcpy(&gid_tbl[i].gid, &gids[i].gid, sizeof(union ib_gid));
223		if (gids[i].gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
224			gid_tbl[i].version = 2;
225			if (!ipv6_addr_v4mapped((struct in6_addr *)&gids[i].gid))
226				gid_tbl[i].type = 1;
227			else
228				memset(&gid_tbl[i].gid, 0, 12);
229		}
230	}
231
232	err = mlx4_cmd(dev, mailbox->dma,
233		       MLX4_SET_PORT_ROCE_ADDR << 8 | port_num,
234		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
235		       MLX4_CMD_WRAPPED);
236	if (mlx4_is_bonded(dev))
237		err += mlx4_cmd(dev, mailbox->dma,
238				MLX4_SET_PORT_ROCE_ADDR << 8 | 2,
239				1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
240				MLX4_CMD_WRAPPED);
241
242	mlx4_free_cmd_mailbox(dev, mailbox);
243	return err;
244}
245
246static int mlx4_ib_update_gids(struct gid_entry *gids,
247			       struct mlx4_ib_dev *ibdev,
248			       u8 port_num)
249{
250	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
251		return mlx4_ib_update_gids_v1_v2(gids, ibdev, port_num);
252
253	return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
254}
255
256static int mlx4_ib_add_gid(struct ib_device *device,
257			   u8 port_num,
258			   unsigned int index,
259			   const union ib_gid *gid,
260			   const struct ib_gid_attr *attr,
261			   void **context)
262{
263	struct mlx4_ib_dev *ibdev = to_mdev(device);
264	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
265	struct mlx4_port_gid_table   *port_gid_table;
266	int free = -1, found = -1;
267	int ret = 0;
268	int hw_update = 0;
269	int i;
270	struct gid_entry *gids = NULL;
271
272	if (!rdma_cap_roce_gid_table(device, port_num))
273		return -EINVAL;
274
275	if (port_num > MLX4_MAX_PORTS)
276		return -EINVAL;
277
278	if (!context)
279		return -EINVAL;
280
281	port_gid_table = &iboe->gids[port_num - 1];
282	spin_lock_bh(&iboe->lock);
283	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
284		if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
285		    (port_gid_table->gids[i].gid_type == attr->gid_type))  {
286			found = i;
287			break;
288		}
289		if (free < 0 && !memcmp(&port_gid_table->gids[i].gid, &zgid, sizeof(*gid)))
290			free = i; /* HW has space */
291	}
292
293	if (found < 0) {
294		if (free < 0) {
295			ret = -ENOSPC;
296		} else {
297			port_gid_table->gids[free].ctx = kmalloc(sizeof(*port_gid_table->gids[free].ctx), GFP_ATOMIC);
298			if (!port_gid_table->gids[free].ctx) {
299				ret = -ENOMEM;
300			} else {
301				*context = port_gid_table->gids[free].ctx;
302				memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
303				port_gid_table->gids[free].gid_type = attr->gid_type;
304				port_gid_table->gids[free].ctx->real_index = free;
305				port_gid_table->gids[free].ctx->refcount = 1;
306				hw_update = 1;
307			}
308		}
309	} else {
310		struct gid_cache_context *ctx = port_gid_table->gids[found].ctx;
311		*context = ctx;
312		ctx->refcount++;
313	}
314	if (!ret && hw_update) {
315		gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
316		if (!gids) {
317			ret = -ENOMEM;
318		} else {
319			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++) {
320				memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
321				gids[i].gid_type = port_gid_table->gids[i].gid_type;
322			}
323		}
324	}
325	spin_unlock_bh(&iboe->lock);
326
327	if (!ret && hw_update) {
328		ret = mlx4_ib_update_gids(gids, ibdev, port_num);
329		kfree(gids);
330	}
331
332	return ret;
333}
334
335static int mlx4_ib_del_gid(struct ib_device *device,
336			   u8 port_num,
337			   unsigned int index,
338			   void **context)
339{
340	struct gid_cache_context *ctx = *context;
341	struct mlx4_ib_dev *ibdev = to_mdev(device);
342	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
343	struct mlx4_port_gid_table   *port_gid_table;
344	int ret = 0;
345	int hw_update = 0;
346	struct gid_entry *gids = NULL;
347
348	if (!rdma_cap_roce_gid_table(device, port_num))
349		return -EINVAL;
350
351	if (port_num > MLX4_MAX_PORTS)
352		return -EINVAL;
353
354	port_gid_table = &iboe->gids[port_num - 1];
355	spin_lock_bh(&iboe->lock);
356	if (ctx) {
357		ctx->refcount--;
358		if (!ctx->refcount) {
359			unsigned int real_index = ctx->real_index;
360
361			memcpy(&port_gid_table->gids[real_index].gid, &zgid, sizeof(zgid));
362			kfree(port_gid_table->gids[real_index].ctx);
363			port_gid_table->gids[real_index].ctx = NULL;
364			hw_update = 1;
365		}
366	}
367	if (!ret && hw_update) {
368		int i;
369
370		gids = kmalloc(sizeof(*gids) * MLX4_MAX_PORT_GIDS, GFP_ATOMIC);
371		if (!gids) {
372			ret = -ENOMEM;
373		} else {
374			for (i = 0; i < MLX4_MAX_PORT_GIDS; i++)
375				memcpy(&gids[i].gid, &port_gid_table->gids[i].gid, sizeof(union ib_gid));
376		}
377	}
378	spin_unlock_bh(&iboe->lock);
379
380	if (!ret && hw_update) {
381		ret = mlx4_ib_update_gids(gids, ibdev, port_num);
382		kfree(gids);
383	}
384	return ret;
385}
386
387int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
388				    u8 port_num, int index)
389{
390	struct mlx4_ib_iboe *iboe = &ibdev->iboe;
391	struct gid_cache_context *ctx = NULL;
392	union ib_gid gid;
393	struct mlx4_port_gid_table   *port_gid_table;
394	int real_index = -EINVAL;
395	int i;
396	int ret;
397	unsigned long flags;
398	struct ib_gid_attr attr;
399
400	if (port_num > MLX4_MAX_PORTS)
401		return -EINVAL;
402
403	if (mlx4_is_bonded(ibdev->dev))
404		port_num = 1;
405
406	if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
407		return index;
408
409	ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
410	if (ret)
411		return ret;
412
413	if (attr.ndev)
414		dev_put(attr.ndev);
415
416	if (!memcmp(&gid, &zgid, sizeof(gid)))
417		return -EINVAL;
418
419	spin_lock_irqsave(&iboe->lock, flags);
420	port_gid_table = &iboe->gids[port_num - 1];
421
422	for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
423		if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
424		    attr.gid_type == port_gid_table->gids[i].gid_type) {
425			ctx = port_gid_table->gids[i].ctx;
426			break;
427		}
428	if (ctx)
429		real_index = ctx->real_index;
430	spin_unlock_irqrestore(&iboe->lock, flags);
431	return real_index;
432}
433
434static int mlx4_ib_query_device(struct ib_device *ibdev,
435				struct ib_device_attr *props,
436				struct ib_udata *uhw)
437{
438	struct mlx4_ib_dev *dev = to_mdev(ibdev);
439	struct ib_smp *in_mad  = NULL;
440	struct ib_smp *out_mad = NULL;
441	int err = -ENOMEM;
442	int have_ib_ports;
443	struct mlx4_uverbs_ex_query_device cmd;
444	struct mlx4_uverbs_ex_query_device_resp resp = {.comp_mask = 0};
445	struct mlx4_clock_params clock_params;
446
447	if (uhw->inlen) {
448		if (uhw->inlen < sizeof(cmd))
449			return -EINVAL;
450
451		err = ib_copy_from_udata(&cmd, uhw, sizeof(cmd));
452		if (err)
453			return err;
454
455		if (cmd.comp_mask)
456			return -EINVAL;
457
458		if (cmd.reserved)
459			return -EINVAL;
460	}
461
462	resp.response_length = offsetof(typeof(resp), response_length) +
463		sizeof(resp.response_length);
464	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
465	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
466	if (!in_mad || !out_mad)
467		goto out;
468
469	init_query_mad(in_mad);
470	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
471
472	err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS,
473			   1, NULL, NULL, in_mad, out_mad);
474	if (err)
475		goto out;
476
477	memset(props, 0, sizeof *props);
478
479	have_ib_ports = num_ib_ports(dev->dev);
480
481	props->fw_ver = dev->dev->caps.fw_ver;
482	props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
483		IB_DEVICE_PORT_ACTIVE_EVENT		|
484		IB_DEVICE_SYS_IMAGE_GUID		|
485		IB_DEVICE_RC_RNR_NAK_GEN		|
486		IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
487	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_PKEY_CNTR)
488		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
489	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
490		props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
491	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports)
492		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
493	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
494		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
495	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
496		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
497	if (dev->dev->caps.max_gso_sz &&
498	    (dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
499	    (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
500		props->device_cap_flags |= IB_DEVICE_UD_TSO;
501	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
502		props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
503	if ((dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_LOCAL_INV) &&
504	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
505	    (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
506		props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
507	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
508		props->device_cap_flags |= IB_DEVICE_XRC;
509	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW)
510		props->device_cap_flags |= IB_DEVICE_MEM_WINDOW;
511	if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
512		if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_WIN_TYPE_2B)
513			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
514		else
515			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
516	}
517	if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
518		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
519
520	props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
521
522	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
523		0xffffff;
524	props->vendor_part_id	   = dev->dev->persist->pdev->device;
525	props->hw_ver		   = be32_to_cpup((__be32 *) (out_mad->data + 32));
526	memcpy(&props->sys_image_guid, out_mad->data +	4, 8);
527
528	props->max_mr_size	   = ~0ull;
529	props->page_size_cap	   = dev->dev->caps.page_size_cap;
530	props->max_qp		   = dev->dev->quotas.qp;
531	props->max_qp_wr	   = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
532	props->max_sge		   = min(dev->dev->caps.max_sq_sg,
533					 dev->dev->caps.max_rq_sg);
534	props->max_sge_rd	   = MLX4_MAX_SGE_RD;
535	props->max_cq		   = dev->dev->quotas.cq;
536	props->max_cqe		   = dev->dev->caps.max_cqes;
537	props->max_mr		   = dev->dev->quotas.mpt;
538	props->max_pd		   = dev->dev->caps.num_pds - dev->dev->caps.reserved_pds;
539	props->max_qp_rd_atom	   = dev->dev->caps.max_qp_dest_rdma;
540	props->max_qp_init_rd_atom = dev->dev->caps.max_qp_init_rdma;
541	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
542	props->max_srq		   = dev->dev->quotas.srq;
543	props->max_srq_wr	   = dev->dev->caps.max_srq_wqes - 1;
544	props->max_srq_sge	   = dev->dev->caps.max_srq_sge;
545	props->max_fast_reg_page_list_len = MLX4_MAX_FAST_REG_PAGES;
546	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
547	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
548		IB_ATOMIC_HCA : IB_ATOMIC_NONE;
549	props->masked_atomic_cap   = props->atomic_cap;
550	props->max_pkeys	   = dev->dev->caps.pkey_table_len[1];
551	props->max_mcast_grp	   = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
552	props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
553	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
554					   props->max_mcast_grp;
555	props->max_map_per_fmr = dev->dev->caps.max_fmr_maps;
556	props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL;
557	props->timestamp_mask = 0xFFFFFFFFFFFFULL;
558
559	if (!mlx4_is_slave(dev->dev))
560		err = mlx4_get_internal_clock_params(dev->dev, &clock_params);
561
562	if (uhw->outlen >= resp.response_length + sizeof(resp.hca_core_clock_offset)) {
563		resp.response_length += sizeof(resp.hca_core_clock_offset);
564		if (!err && !mlx4_is_slave(dev->dev)) {
565			resp.comp_mask |= QUERY_DEVICE_RESP_MASK_TIMESTAMP;
566			resp.hca_core_clock_offset = clock_params.offset % PAGE_SIZE;
567		}
568	}
569
570	if (uhw->outlen) {
571		err = ib_copy_to_udata(uhw, &resp, resp.response_length);
572		if (err)
573			goto out;
574	}
575out:
576	kfree(in_mad);
577	kfree(out_mad);
578
579	return err;
580}
581
582static enum rdma_link_layer
583mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
584{
585	struct mlx4_dev *dev = to_mdev(device)->dev;
586
587	return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
588		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
589}
590
591static int ib_link_query_port(struct ib_device *ibdev, u8 port,
592			      struct ib_port_attr *props, int netw_view)
593{
594	struct ib_smp *in_mad  = NULL;
595	struct ib_smp *out_mad = NULL;
596	int ext_active_speed;
597	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
598	int err = -ENOMEM;
599
600	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
601	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
602	if (!in_mad || !out_mad)
603		goto out;
604
605	init_query_mad(in_mad);
606	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
607	in_mad->attr_mod = cpu_to_be32(port);
608
609	if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
610		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
611
612	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
613				in_mad, out_mad);
614	if (err)
615		goto out;
616
617
618	props->lid		= be16_to_cpup((__be16 *) (out_mad->data + 16));
619	props->lmc		= out_mad->data[34] & 0x7;
620	props->sm_lid		= be16_to_cpup((__be16 *) (out_mad->data + 18));
621	props->sm_sl		= out_mad->data[36] & 0xf;
622	props->state		= out_mad->data[32] & 0xf;
623	props->phys_state	= out_mad->data[33] >> 4;
624	props->port_cap_flags	= be32_to_cpup((__be32 *) (out_mad->data + 20));
625	if (netw_view)
626		props->gid_tbl_len = out_mad->data[50];
627	else
628		props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
629	props->max_msg_sz	= to_mdev(ibdev)->dev->caps.max_msg_sz;
630	props->pkey_tbl_len	= to_mdev(ibdev)->dev->caps.pkey_table_len[port];
631	props->bad_pkey_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 46));
632	props->qkey_viol_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 48));
633	props->active_width	= out_mad->data[31] & 0xf;
634	props->active_speed	= out_mad->data[35] >> 4;
635	props->max_mtu		= out_mad->data[41] & 0xf;
636	props->active_mtu	= out_mad->data[36] >> 4;
637	props->subnet_timeout	= out_mad->data[51] & 0x1f;
638	props->max_vl_num	= out_mad->data[37] >> 4;
639	props->init_type_reply	= out_mad->data[41] >> 4;
640
641	/* Check if extended speeds (EDR/FDR/...) are supported */
642	if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
643		ext_active_speed = out_mad->data[62] >> 4;
644
645		switch (ext_active_speed) {
646		case 1:
647			props->active_speed = IB_SPEED_FDR;
648			break;
649		case 2:
650			props->active_speed = IB_SPEED_EDR;
651			break;
652		}
653	}
654
655	/* If reported active speed is QDR, check if is FDR-10 */
656	if (props->active_speed == IB_SPEED_QDR) {
657		init_query_mad(in_mad);
658		in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
659		in_mad->attr_mod = cpu_to_be32(port);
660
661		err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
662				   NULL, NULL, in_mad, out_mad);
663		if (err)
664			goto out;
665
666		/* Checking LinkSpeedActive for FDR-10 */
667		if (out_mad->data[15] & 0x1)
668			props->active_speed = IB_SPEED_FDR10;
669	}
670
671	/* Avoid wrong speed value returned by FW if the IB link is down. */
672	if (props->state == IB_PORT_DOWN)
673		 props->active_speed = IB_SPEED_SDR;
674
675out:
676	kfree(in_mad);
677	kfree(out_mad);
678	return err;
679}
680
681static u8 state_to_phys_state(enum ib_port_state state)
682{
683	return state == IB_PORT_ACTIVE ? 5 : 3;
684}
685
686static int eth_link_query_port(struct ib_device *ibdev, u8 port,
687			       struct ib_port_attr *props, int netw_view)
688{
689
690	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
691	struct mlx4_ib_iboe *iboe = &mdev->iboe;
692	struct net_device *ndev;
693	enum ib_mtu tmp;
694	struct mlx4_cmd_mailbox *mailbox;
695	int err = 0;
696	int is_bonded = mlx4_is_bonded(mdev->dev);
697
698	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
699	if (IS_ERR(mailbox))
700		return PTR_ERR(mailbox);
701
702	err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
703			   MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
704			   MLX4_CMD_WRAPPED);
705	if (err)
706		goto out;
707
708	props->active_width	=  (((u8 *)mailbox->buf)[5] == 0x40) ?
709						IB_WIDTH_4X : IB_WIDTH_1X;
710	props->active_speed	= IB_SPEED_QDR;
711	props->port_cap_flags	= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
712	props->gid_tbl_len	= mdev->dev->caps.gid_table_len[port];
713	props->max_msg_sz	= mdev->dev->caps.max_msg_sz;
714	props->pkey_tbl_len	= 1;
715	props->max_mtu		= IB_MTU_4096;
716	props->max_vl_num	= 2;
717	props->state		= IB_PORT_DOWN;
718	props->phys_state	= state_to_phys_state(props->state);
719	props->active_mtu	= IB_MTU_256;
720	spin_lock_bh(&iboe->lock);
721	ndev = iboe->netdevs[port - 1];
722	if (ndev && is_bonded) {
723#if 0
724		rcu_read_lock(); /* required to get upper dev */
725		ndev = netdev_master_upper_dev_get_rcu(ndev);
726		rcu_read_unlock();
727#endif
728	}
729	if (!ndev)
730		goto out_unlock;
731
732	tmp = iboe_get_mtu(ndev->if_mtu);
733	props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
734
735	props->state		= (netif_running(ndev) && netif_carrier_ok(ndev)) ?
736					IB_PORT_ACTIVE : IB_PORT_DOWN;
737	props->phys_state	= state_to_phys_state(props->state);
738out_unlock:
739	spin_unlock_bh(&iboe->lock);
740out:
741	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
742	return err;
743}
744
745int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
746			 struct ib_port_attr *props, int netw_view)
747{
748	int err;
749
750	memset(props, 0, sizeof *props);
751
752	err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
753		ib_link_query_port(ibdev, port, props, netw_view) :
754				eth_link_query_port(ibdev, port, props, netw_view);
755
756	return err;
757}
758
759static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
760			      struct ib_port_attr *props)
761{
762	/* returns host view */
763	return __mlx4_ib_query_port(ibdev, port, props, 0);
764}
765
766int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
767			union ib_gid *gid, int netw_view)
768{
769	struct ib_smp *in_mad  = NULL;
770	struct ib_smp *out_mad = NULL;
771	int err = -ENOMEM;
772	struct mlx4_ib_dev *dev = to_mdev(ibdev);
773	int clear = 0;
774	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
775
776	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
777	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
778	if (!in_mad || !out_mad)
779		goto out;
780
781	init_query_mad(in_mad);
782	in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
783	in_mad->attr_mod = cpu_to_be32(port);
784
785	if (mlx4_is_mfunc(dev->dev) && netw_view)
786		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
787
788	err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
789	if (err)
790		goto out;
791
792	memcpy(gid->raw, out_mad->data + 8, 8);
793
794	if (mlx4_is_mfunc(dev->dev) && !netw_view) {
795		if (index) {
796			/* For any index > 0, return the null guid */
797			err = 0;
798			clear = 1;
799			goto out;
800		}
801	}
802
803	init_query_mad(in_mad);
804	in_mad->attr_id  = IB_SMP_ATTR_GUID_INFO;
805	in_mad->attr_mod = cpu_to_be32(index / 8);
806
807	err = mlx4_MAD_IFC(dev, mad_ifc_flags, port,
808			   NULL, NULL, in_mad, out_mad);
809	if (err)
810		goto out;
811
812	memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
813
814out:
815	if (clear)
816		memset(gid->raw + 8, 0, 8);
817	kfree(in_mad);
818	kfree(out_mad);
819	return err;
820}
821
822static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
823			     union ib_gid *gid)
824{
825	int ret;
826
827	if (rdma_protocol_ib(ibdev, port))
828		return __mlx4_ib_query_gid(ibdev, port, index, gid, 0);
829
830	if (!rdma_protocol_roce(ibdev, port))
831		return -ENODEV;
832
833	if (!rdma_cap_roce_gid_table(ibdev, port))
834		return -ENODEV;
835
836	ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
837	if (ret == -EAGAIN) {
838		memcpy(gid, &zgid, sizeof(*gid));
839		return 0;
840	}
841
842	return ret;
843}
844
845static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl)
846{
847	union sl2vl_tbl_to_u64 sl2vl64;
848	struct ib_smp *in_mad  = NULL;
849	struct ib_smp *out_mad = NULL;
850	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
851	int err = -ENOMEM;
852	int jj;
853
854	if (mlx4_is_slave(to_mdev(ibdev)->dev)) {
855		*sl2vl_tbl = 0;
856		return 0;
857	}
858
859	in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
860	out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
861	if (!in_mad || !out_mad)
862		goto out;
863
864	init_query_mad(in_mad);
865	in_mad->attr_id  = IB_SMP_ATTR_SL_TO_VL_TABLE;
866	in_mad->attr_mod = 0;
867
868	if (mlx4_is_mfunc(to_mdev(ibdev)->dev))
869		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
870
871	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
872			   in_mad, out_mad);
873	if (err)
874		goto out;
875
876	for (jj = 0; jj < 8; jj++)
877		sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj];
878	*sl2vl_tbl = sl2vl64.sl64;
879
880out:
881	kfree(in_mad);
882	kfree(out_mad);
883	return err;
884}
885
886static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev)
887{
888	u64 sl2vl;
889	int i;
890	int err;
891
892	for (i = 1; i <= mdev->dev->caps.num_ports; i++) {
893		if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH)
894			continue;
895		err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl);
896		if (err) {
897			pr_err("Unable to get default sl to vl mapping for port %d.  Using all zeroes (%d)\n",
898			       i, err);
899			sl2vl = 0;
900		}
901		atomic64_set(&mdev->sl2vl[i - 1], sl2vl);
902	}
903}
904
905int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
906			 u16 *pkey, int netw_view)
907{
908	struct ib_smp *in_mad  = NULL;
909	struct ib_smp *out_mad = NULL;
910	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
911	int err = -ENOMEM;
912
913	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
914	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
915	if (!in_mad || !out_mad)
916		goto out;
917
918	init_query_mad(in_mad);
919	in_mad->attr_id  = IB_SMP_ATTR_PKEY_TABLE;
920	in_mad->attr_mod = cpu_to_be32(index / 32);
921
922	if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
923		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
924
925	err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
926			   in_mad, out_mad);
927	if (err)
928		goto out;
929
930	*pkey = be16_to_cpu(((__be16 *) out_mad->data)[index % 32]);
931
932out:
933	kfree(in_mad);
934	kfree(out_mad);
935	return err;
936}
937
938static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
939{
940	return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
941}
942
943static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
944				 struct ib_device_modify *props)
945{
946	struct mlx4_cmd_mailbox *mailbox;
947	unsigned long flags;
948
949	if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
950		return -EOPNOTSUPP;
951
952	if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
953		return 0;
954
955	if (mlx4_is_slave(to_mdev(ibdev)->dev))
956		return -EOPNOTSUPP;
957
958	spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
959	memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
960	spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags);
961
962	/*
963	 * If possible, pass node desc to FW, so it can generate
964	 * a 144 trap.  If cmd fails, just ignore.
965	 */
966	mailbox = mlx4_alloc_cmd_mailbox(to_mdev(ibdev)->dev);
967	if (IS_ERR(mailbox))
968		return 0;
969
970	memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
971	mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
972		 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
973
974	mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
975
976	return 0;
977}
978
979static int mlx4_ib_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
980			    u32 cap_mask)
981{
982	struct mlx4_cmd_mailbox *mailbox;
983	int err;
984
985	mailbox = mlx4_alloc_cmd_mailbox(dev->dev);
986	if (IS_ERR(mailbox))
987		return PTR_ERR(mailbox);
988
989	if (dev->dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
990		*(u8 *) mailbox->buf	     = !!reset_qkey_viols << 6;
991		((__be32 *) mailbox->buf)[2] = cpu_to_be32(cap_mask);
992	} else {
993		((u8 *) mailbox->buf)[3]     = !!reset_qkey_viols;
994		((__be32 *) mailbox->buf)[1] = cpu_to_be32(cap_mask);
995	}
996
997	err = mlx4_cmd(dev->dev, mailbox->dma, port, MLX4_SET_PORT_IB_OPCODE,
998		       MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
999		       MLX4_CMD_WRAPPED);
1000
1001	mlx4_free_cmd_mailbox(dev->dev, mailbox);
1002	return err;
1003}
1004
1005static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
1006			       struct ib_port_modify *props)
1007{
1008	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
1009	u8 is_eth = mdev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
1010	struct ib_port_attr attr;
1011	u32 cap_mask;
1012	int err;
1013
1014	/* return OK if this is RoCE. CM calls ib_modify_port() regardless
1015	 * of whether port link layer is ETH or IB. For ETH ports, qkey
1016	 * violations and port capabilities are not meaningful.
1017	 */
1018	if (is_eth)
1019		return 0;
1020
1021	mutex_lock(&mdev->cap_mask_mutex);
1022
1023	err = mlx4_ib_query_port(ibdev, port, &attr);
1024	if (err)
1025		goto out;
1026
1027	cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
1028		~props->clr_port_cap_mask;
1029
1030	err = mlx4_ib_SET_PORT(mdev, port,
1031			       !!(mask & IB_PORT_RESET_QKEY_CNTR),
1032			       cap_mask);
1033
1034out:
1035	mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex);
1036	return err;
1037}
1038
1039static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
1040						  struct ib_udata *udata)
1041{
1042	struct mlx4_ib_dev *dev = to_mdev(ibdev);
1043	struct mlx4_ib_ucontext *context;
1044	struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
1045	struct mlx4_ib_alloc_ucontext_resp resp;
1046	int err;
1047
1048	if (!dev->ib_active)
1049		return ERR_PTR(-EAGAIN);
1050
1051	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) {
1052		resp_v3.qp_tab_size      = dev->dev->caps.num_qps;
1053		resp_v3.bf_reg_size      = dev->dev->caps.bf_reg_size;
1054		resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
1055	} else {
1056		resp.dev_caps	      = dev->dev->caps.userspace_caps;
1057		resp.qp_tab_size      = dev->dev->caps.num_qps;
1058		resp.bf_reg_size      = dev->dev->caps.bf_reg_size;
1059		resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
1060		resp.cqe_size	      = dev->dev->caps.cqe_size;
1061	}
1062
1063	context = kzalloc(sizeof(*context), GFP_KERNEL);
1064	if (!context)
1065		return ERR_PTR(-ENOMEM);
1066
1067	err = mlx4_uar_alloc(to_mdev(ibdev)->dev, &context->uar);
1068	if (err) {
1069		kfree(context);
1070		return ERR_PTR(err);
1071	}
1072
1073	INIT_LIST_HEAD(&context->db_page_list);
1074	mutex_init(&context->db_page_mutex);
1075
1076	if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION)
1077		err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
1078	else
1079		err = ib_copy_to_udata(udata, &resp, sizeof(resp));
1080
1081	if (err) {
1082		mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
1083		kfree(context);
1084		return ERR_PTR(-EFAULT);
1085	}
1086
1087	return &context->ibucontext;
1088}
1089
1090static int mlx4_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
1091{
1092	struct mlx4_ib_ucontext *context = to_mucontext(ibcontext);
1093
1094	mlx4_uar_free(to_mdev(ibcontext->device)->dev, &context->uar);
1095	kfree(context);
1096
1097	return 0;
1098}
1099
1100static void  mlx4_ib_vma_open(struct vm_area_struct *area)
1101{
1102	/* vma_open is called when a new VMA is created on top of our VMA.
1103	 * This is done through either mremap flow or split_vma (usually due
1104	 * to mlock, madvise, munmap, etc.). We do not support a clone of the
1105	 * vma, as this VMA is strongly hardware related. Therefore we set the
1106	 * vm_ops of the newly created/cloned VMA to NULL, to prevent it from
1107	 * calling us again and trying to do incorrect actions. We assume that
1108	 * the original vma size is exactly a single page that there will be no
1109	 * "splitting" operations on.
1110	 */
1111	area->vm_ops = NULL;
1112}
1113
1114static void  mlx4_ib_vma_close(struct vm_area_struct *area)
1115{
1116	struct mlx4_ib_vma_private_data *mlx4_ib_vma_priv_data;
1117
1118	/* It's guaranteed that all VMAs opened on a FD are closed before the
1119	 * file itself is closed, therefore no sync is needed with the regular
1120	 * closing flow. (e.g. mlx4_ib_dealloc_ucontext) However need a sync
1121	 * with accessing the vma as part of mlx4_ib_disassociate_ucontext.
1122	 * The close operation is usually called under mm->mmap_sem except when
1123	 * process is exiting.  The exiting case is handled explicitly as part
1124	 * of mlx4_ib_disassociate_ucontext.
1125	 */
1126	mlx4_ib_vma_priv_data = (struct mlx4_ib_vma_private_data *)
1127				area->vm_private_data;
1128
1129	/* set the vma context pointer to null in the mlx4_ib driver's private
1130	 * data to protect against a race condition in mlx4_ib_dissassociate_ucontext().
1131	 */
1132	mlx4_ib_vma_priv_data->vma = NULL;
1133}
1134
1135static const struct vm_operations_struct mlx4_ib_vm_ops = {
1136	.open = mlx4_ib_vma_open,
1137	.close = mlx4_ib_vma_close
1138};
1139
1140static void mlx4_ib_set_vma_data(struct vm_area_struct *vma,
1141				 struct mlx4_ib_vma_private_data *vma_private_data)
1142{
1143	vma_private_data->vma = vma;
1144	vma->vm_private_data = vma_private_data;
1145	vma->vm_ops =  &mlx4_ib_vm_ops;
1146}
1147
1148static int mlx4_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
1149{
1150	struct mlx4_ib_dev *dev = to_mdev(context->device);
1151	struct mlx4_ib_ucontext *mucontext = to_mucontext(context);
1152
1153	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
1154		return -EINVAL;
1155
1156	if (vma->vm_pgoff == 0) {
1157		/* We prevent double mmaping on same context */
1158		if (mucontext->hw_bar_info[HW_BAR_DB].vma)
1159			return -EINVAL;
1160
1161		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1162
1163		if (io_remap_pfn_range(vma, vma->vm_start,
1164				       to_mucontext(context)->uar.pfn,
1165				       PAGE_SIZE, vma->vm_page_prot))
1166			return -EAGAIN;
1167
1168		mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_DB]);
1169
1170	} else if (vma->vm_pgoff == 1 && dev->dev->caps.bf_reg_size != 0) {
1171		/* We prevent double mmaping on same context */
1172		if (mucontext->hw_bar_info[HW_BAR_BF].vma)
1173			return -EINVAL;
1174
1175		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
1176
1177		if (io_remap_pfn_range(vma, vma->vm_start,
1178				       to_mucontext(context)->uar.pfn +
1179				       dev->dev->caps.num_uars,
1180				       PAGE_SIZE, vma->vm_page_prot))
1181			return -EAGAIN;
1182
1183		mlx4_ib_set_vma_data(vma, &mucontext->hw_bar_info[HW_BAR_BF]);
1184
1185	} else if (vma->vm_pgoff == 3) {
1186		struct mlx4_clock_params params;
1187		int ret;
1188
1189		/* We prevent double mmaping on same context */
1190		if (mucontext->hw_bar_info[HW_BAR_CLOCK].vma)
1191			return -EINVAL;
1192
1193		ret = mlx4_get_internal_clock_params(dev->dev, &params);
1194
1195		if (ret)
1196			return ret;
1197
1198		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1199		if (io_remap_pfn_range(vma, vma->vm_start,
1200				       (pci_resource_start(dev->dev->persist->pdev,
1201							   params.bar) +
1202					params.offset)
1203				       >> PAGE_SHIFT,
1204				       PAGE_SIZE, vma->vm_page_prot))
1205			return -EAGAIN;
1206
1207		mlx4_ib_set_vma_data(vma,
1208				     &mucontext->hw_bar_info[HW_BAR_CLOCK]);
1209	} else {
1210		return -EINVAL;
1211	}
1212
1213	return 0;
1214}
1215
1216static struct ib_pd *mlx4_ib_alloc_pd(struct ib_device *ibdev,
1217				      struct ib_ucontext *context,
1218				      struct ib_udata *udata)
1219{
1220	struct mlx4_ib_pd *pd;
1221	int err;
1222
1223	pd = kmalloc(sizeof *pd, GFP_KERNEL);
1224	if (!pd)
1225		return ERR_PTR(-ENOMEM);
1226
1227	err = mlx4_pd_alloc(to_mdev(ibdev)->dev, &pd->pdn);
1228	if (err) {
1229		kfree(pd);
1230		return ERR_PTR(err);
1231	}
1232
1233	if (context)
1234		if (ib_copy_to_udata(udata, &pd->pdn, sizeof (__u32))) {
1235			mlx4_pd_free(to_mdev(ibdev)->dev, pd->pdn);
1236			kfree(pd);
1237			return ERR_PTR(-EFAULT);
1238		}
1239
1240	return &pd->ibpd;
1241}
1242
1243static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
1244{
1245	mlx4_pd_free(to_mdev(pd->device)->dev, to_mpd(pd)->pdn);
1246	kfree(pd);
1247
1248	return 0;
1249}
1250
1251static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
1252					  struct ib_ucontext *context,
1253					  struct ib_udata *udata)
1254{
1255	struct mlx4_ib_xrcd *xrcd;
1256	struct ib_cq_init_attr cq_attr = {};
1257	int err;
1258
1259	if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
1260		return ERR_PTR(-ENOSYS);
1261
1262	xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
1263	if (!xrcd)
1264		return ERR_PTR(-ENOMEM);
1265
1266	err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
1267	if (err)
1268		goto err1;
1269
1270	xrcd->pd = ib_alloc_pd(ibdev, 0);
1271	if (IS_ERR(xrcd->pd)) {
1272		err = PTR_ERR(xrcd->pd);
1273		goto err2;
1274	}
1275
1276	cq_attr.cqe = 1;
1277	xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, &cq_attr);
1278	if (IS_ERR(xrcd->cq)) {
1279		err = PTR_ERR(xrcd->cq);
1280		goto err3;
1281	}
1282
1283	return &xrcd->ibxrcd;
1284
1285err3:
1286	ib_dealloc_pd(xrcd->pd);
1287err2:
1288	mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
1289err1:
1290	kfree(xrcd);
1291	return ERR_PTR(err);
1292}
1293
1294static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
1295{
1296	ib_destroy_cq(to_mxrcd(xrcd)->cq);
1297	ib_dealloc_pd(to_mxrcd(xrcd)->pd);
1298	mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
1299	kfree(xrcd);
1300
1301	return 0;
1302}
1303
1304static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
1305{
1306	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1307	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1308	struct mlx4_ib_gid_entry *ge;
1309
1310	ge = kzalloc(sizeof *ge, GFP_KERNEL);
1311	if (!ge)
1312		return -ENOMEM;
1313
1314	ge->gid = *gid;
1315	if (mlx4_ib_add_mc(mdev, mqp, gid)) {
1316		ge->port = mqp->port;
1317		ge->added = 1;
1318	}
1319
1320	mutex_lock(&mqp->mutex);
1321	list_add_tail(&ge->list, &mqp->gid_list);
1322	mutex_unlock(&mqp->mutex);
1323
1324	return 0;
1325}
1326
1327static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
1328					  struct mlx4_ib_counters *ctr_table)
1329{
1330	struct counter_index *counter, *tmp_count;
1331
1332	mutex_lock(&ctr_table->mutex);
1333	list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
1334				 list) {
1335		if (counter->allocated)
1336			mlx4_counter_free(ibdev->dev, counter->index);
1337		list_del(&counter->list);
1338		kfree(counter);
1339	}
1340	mutex_unlock(&ctr_table->mutex);
1341}
1342
1343int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
1344		   union ib_gid *gid)
1345{
1346	struct net_device *ndev;
1347	int ret = 0;
1348
1349	if (!mqp->port)
1350		return 0;
1351
1352	spin_lock_bh(&mdev->iboe.lock);
1353	ndev = mdev->iboe.netdevs[mqp->port - 1];
1354	if (ndev)
1355		dev_hold(ndev);
1356	spin_unlock_bh(&mdev->iboe.lock);
1357
1358	if (ndev) {
1359		ret = 1;
1360		dev_put(ndev);
1361	}
1362
1363	return ret;
1364}
1365
1366struct mlx4_ib_steering {
1367	struct list_head list;
1368	struct mlx4_flow_reg_id reg_id;
1369	union ib_gid gid;
1370};
1371
1372#define LAST_ETH_FIELD vlan_tag
1373#define LAST_IB_FIELD sl
1374#define LAST_IPV4_FIELD dst_ip
1375#define LAST_TCP_UDP_FIELD src_port
1376
1377/* Field is the last supported field */
1378#define FIELDS_NOT_SUPPORTED(filter, field)\
1379	memchr_inv((void *)&filter.field  +\
1380		   sizeof(filter.field), 0,\
1381		   sizeof(filter) -\
1382		   offsetof(typeof(filter), field) -\
1383		   sizeof(filter.field))
1384
1385static int parse_flow_attr(struct mlx4_dev *dev,
1386			   u32 qp_num,
1387			   union ib_flow_spec *ib_spec,
1388			   struct _rule_hw *mlx4_spec)
1389{
1390	enum mlx4_net_trans_rule_id type;
1391
1392	switch (ib_spec->type) {
1393	case IB_FLOW_SPEC_ETH:
1394		if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
1395			return -ENOTSUPP;
1396
1397		type = MLX4_NET_TRANS_RULE_ID_ETH;
1398		memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac,
1399		       ETH_ALEN);
1400		memcpy(mlx4_spec->eth.dst_mac_msk, ib_spec->eth.mask.dst_mac,
1401		       ETH_ALEN);
1402		mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
1403		mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
1404		break;
1405	case IB_FLOW_SPEC_IB:
1406		if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD))
1407			return -ENOTSUPP;
1408
1409		type = MLX4_NET_TRANS_RULE_ID_IB;
1410		mlx4_spec->ib.l3_qpn =
1411			cpu_to_be32(qp_num);
1412		mlx4_spec->ib.qpn_mask =
1413			cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
1414		break;
1415
1416
1417	case IB_FLOW_SPEC_IPV4:
1418		if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
1419			return -ENOTSUPP;
1420
1421		type = MLX4_NET_TRANS_RULE_ID_IPV4;
1422		mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip;
1423		mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip;
1424		mlx4_spec->ipv4.dst_ip = ib_spec->ipv4.val.dst_ip;
1425		mlx4_spec->ipv4.dst_ip_msk = ib_spec->ipv4.mask.dst_ip;
1426		break;
1427
1428	case IB_FLOW_SPEC_TCP:
1429	case IB_FLOW_SPEC_UDP:
1430		if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD))
1431			return -ENOTSUPP;
1432
1433		type = ib_spec->type == IB_FLOW_SPEC_TCP ?
1434					MLX4_NET_TRANS_RULE_ID_TCP :
1435					MLX4_NET_TRANS_RULE_ID_UDP;
1436		mlx4_spec->tcp_udp.dst_port = ib_spec->tcp_udp.val.dst_port;
1437		mlx4_spec->tcp_udp.dst_port_msk = ib_spec->tcp_udp.mask.dst_port;
1438		mlx4_spec->tcp_udp.src_port = ib_spec->tcp_udp.val.src_port;
1439		mlx4_spec->tcp_udp.src_port_msk = ib_spec->tcp_udp.mask.src_port;
1440		break;
1441
1442	default:
1443		return -EINVAL;
1444	}
1445	if (mlx4_map_sw_to_hw_steering_id(dev, type) < 0 ||
1446	    mlx4_hw_rule_sz(dev, type) < 0)
1447		return -EINVAL;
1448	mlx4_spec->id = cpu_to_be16(mlx4_map_sw_to_hw_steering_id(dev, type));
1449	mlx4_spec->size = mlx4_hw_rule_sz(dev, type) >> 2;
1450	return mlx4_hw_rule_sz(dev, type);
1451}
1452
1453struct default_rules {
1454	__u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
1455	__u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
1456	__u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
1457	__u8  link_layer;
1458};
1459static const struct default_rules default_table[] = {
1460	{
1461		.mandatory_fields = {IB_FLOW_SPEC_IPV4},
1462		.mandatory_not_fields = {IB_FLOW_SPEC_ETH},
1463		.rules_create_list = {IB_FLOW_SPEC_IB},
1464		.link_layer = IB_LINK_LAYER_INFINIBAND
1465	}
1466};
1467
1468static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
1469					 struct ib_flow_attr *flow_attr)
1470{
1471	int i, j, k;
1472	void *ib_flow;
1473	const struct default_rules *pdefault_rules = default_table;
1474	u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
1475
1476	for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
1477		__u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
1478		memset(&field_types, 0, sizeof(field_types));
1479
1480		if (link_layer != pdefault_rules->link_layer)
1481			continue;
1482
1483		ib_flow = flow_attr + 1;
1484		/* we assume the specs are sorted */
1485		for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
1486		     j < flow_attr->num_of_specs; k++) {
1487			union ib_flow_spec *current_flow =
1488				(union ib_flow_spec *)ib_flow;
1489
1490			/* same layer but different type */
1491			if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
1492			     (pdefault_rules->mandatory_fields[k] &
1493			      IB_FLOW_SPEC_LAYER_MASK)) &&
1494			    (current_flow->type !=
1495			     pdefault_rules->mandatory_fields[k]))
1496				goto out;
1497
1498			/* same layer, try match next one */
1499			if (current_flow->type ==
1500			    pdefault_rules->mandatory_fields[k]) {
1501				j++;
1502				ib_flow +=
1503					((union ib_flow_spec *)ib_flow)->size;
1504			}
1505		}
1506
1507		ib_flow = flow_attr + 1;
1508		for (j = 0; j < flow_attr->num_of_specs;
1509		     j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
1510			for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
1511				/* same layer and same type */
1512				if (((union ib_flow_spec *)ib_flow)->type ==
1513				    pdefault_rules->mandatory_not_fields[k])
1514					goto out;
1515
1516		return i;
1517	}
1518out:
1519	return -1;
1520}
1521
1522static int __mlx4_ib_create_default_rules(
1523		struct mlx4_ib_dev *mdev,
1524		struct ib_qp *qp,
1525		const struct default_rules *pdefault_rules,
1526		struct _rule_hw *mlx4_spec) {
1527	int size = 0;
1528	int i;
1529
1530	for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
1531		int ret;
1532		union ib_flow_spec ib_spec;
1533		switch (pdefault_rules->rules_create_list[i]) {
1534		case 0:
1535			/* no rule */
1536			continue;
1537		case IB_FLOW_SPEC_IB:
1538			ib_spec.type = IB_FLOW_SPEC_IB;
1539			ib_spec.size = sizeof(struct ib_flow_spec_ib);
1540
1541			break;
1542		default:
1543			/* invalid rule */
1544			return -EINVAL;
1545		}
1546		/* We must put empty rule, qpn is being ignored */
1547		ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
1548				      mlx4_spec);
1549		if (ret < 0) {
1550			pr_info("invalid parsing\n");
1551			return -EINVAL;
1552		}
1553
1554		mlx4_spec = (void *)mlx4_spec + ret;
1555		size += ret;
1556	}
1557	return size;
1558}
1559
1560static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1561			  int domain,
1562			  enum mlx4_net_trans_promisc_mode flow_type,
1563			  u64 *reg_id)
1564{
1565	int ret, i;
1566	int size = 0;
1567	void *ib_flow;
1568	struct mlx4_ib_dev *mdev = to_mdev(qp->device);
1569	struct mlx4_cmd_mailbox *mailbox;
1570	struct mlx4_net_trans_rule_hw_ctrl *ctrl;
1571	int default_flow;
1572
1573	static const u16 __mlx4_domain[] = {
1574		[IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
1575		[IB_FLOW_DOMAIN_ETHTOOL] = MLX4_DOMAIN_ETHTOOL,
1576		[IB_FLOW_DOMAIN_RFS] = MLX4_DOMAIN_RFS,
1577		[IB_FLOW_DOMAIN_NIC] = MLX4_DOMAIN_NIC,
1578	};
1579
1580	if (flow_attr->priority > MLX4_IB_FLOW_MAX_PRIO) {
1581		pr_err("Invalid priority value %d\n", flow_attr->priority);
1582		return -EINVAL;
1583	}
1584
1585	if (domain >= IB_FLOW_DOMAIN_NUM) {
1586		pr_err("Invalid domain value %d\n", domain);
1587		return -EINVAL;
1588	}
1589
1590	if (mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type) < 0)
1591		return -EINVAL;
1592
1593	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
1594	if (IS_ERR(mailbox))
1595		return PTR_ERR(mailbox);
1596	ctrl = mailbox->buf;
1597
1598	ctrl->prio = cpu_to_be16(__mlx4_domain[domain] |
1599				 flow_attr->priority);
1600	ctrl->type = mlx4_map_sw_to_hw_steering_mode(mdev->dev, flow_type);
1601	ctrl->port = flow_attr->port;
1602	ctrl->qpn = cpu_to_be32(qp->qp_num);
1603
1604	ib_flow = flow_attr + 1;
1605	size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
1606	/* Add default flows */
1607	default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
1608	if (default_flow >= 0) {
1609		ret = __mlx4_ib_create_default_rules(
1610				mdev, qp, default_table + default_flow,
1611				mailbox->buf + size);
1612		if (ret < 0) {
1613			mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1614			return -EINVAL;
1615		}
1616		size += ret;
1617	}
1618	for (i = 0; i < flow_attr->num_of_specs; i++) {
1619		ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
1620				      mailbox->buf + size);
1621		if (ret < 0) {
1622			mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1623			return -EINVAL;
1624		}
1625		ib_flow += ((union ib_flow_spec *) ib_flow)->size;
1626		size += ret;
1627	}
1628
1629	ret = mlx4_cmd_imm(mdev->dev, mailbox->dma, reg_id, size >> 2, 0,
1630			   MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
1631			   MLX4_CMD_WRAPPED);
1632	if (ret == -ENOMEM)
1633		pr_err("mcg table is full. Fail to register network rule.\n");
1634	else if (ret == -ENXIO)
1635		pr_err("Device managed flow steering is disabled. Fail to register network rule.\n");
1636	else if (ret)
1637		pr_err("Invalid argument. Fail to register network rule.\n");
1638
1639	mlx4_free_cmd_mailbox(mdev->dev, mailbox);
1640	return ret;
1641}
1642
1643static int __mlx4_ib_destroy_flow(struct mlx4_dev *dev, u64 reg_id)
1644{
1645	int err;
1646	err = mlx4_cmd(dev, reg_id, 0, 0,
1647		       MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
1648		       MLX4_CMD_WRAPPED);
1649	if (err)
1650		pr_err("Fail to detach network rule. registration id = 0x%llx\n",
1651		       (long long)reg_id);
1652	return err;
1653}
1654
1655static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
1656				    u64 *reg_id)
1657{
1658	void *ib_flow;
1659	union ib_flow_spec *ib_spec;
1660	struct mlx4_dev	*dev = to_mdev(qp->device)->dev;
1661	int err = 0;
1662
1663	if (dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN ||
1664	    dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC)
1665		return 0; /* do nothing */
1666
1667	ib_flow = flow_attr + 1;
1668	ib_spec = (union ib_flow_spec *)ib_flow;
1669
1670	if (ib_spec->type !=  IB_FLOW_SPEC_ETH || flow_attr->num_of_specs != 1)
1671		return 0; /* do nothing */
1672
1673	err = mlx4_tunnel_steer_add(to_mdev(qp->device)->dev, ib_spec->eth.val.dst_mac,
1674				    flow_attr->port, qp->qp_num,
1675				    MLX4_DOMAIN_UVERBS | (flow_attr->priority & 0xff),
1676				    reg_id);
1677	return err;
1678}
1679
1680static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
1681				      struct ib_flow_attr *flow_attr,
1682				      enum mlx4_net_trans_promisc_mode *type)
1683{
1684	int err = 0;
1685
1686	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
1687	    (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
1688	    (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
1689		return -EOPNOTSUPP;
1690	}
1691
1692	if (flow_attr->num_of_specs == 0) {
1693		type[0] = MLX4_FS_MC_SNIFFER;
1694		type[1] = MLX4_FS_UC_SNIFFER;
1695	} else {
1696		union ib_flow_spec *ib_spec;
1697
1698		ib_spec = (union ib_flow_spec *)(flow_attr + 1);
1699		if (ib_spec->type !=  IB_FLOW_SPEC_ETH)
1700			return -EINVAL;
1701
1702		/* if all is zero than MC and UC */
1703		if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
1704			type[0] = MLX4_FS_MC_SNIFFER;
1705			type[1] = MLX4_FS_UC_SNIFFER;
1706		} else {
1707			u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
1708					    ib_spec->eth.mask.dst_mac[1],
1709					    ib_spec->eth.mask.dst_mac[2],
1710					    ib_spec->eth.mask.dst_mac[3],
1711					    ib_spec->eth.mask.dst_mac[4],
1712					    ib_spec->eth.mask.dst_mac[5]};
1713
1714			/* Above xor was only on MC bit, non empty mask is valid
1715			 * only if this bit is set and rest are zero.
1716			 */
1717			if (!is_zero_ether_addr(&mac[0]))
1718				return -EINVAL;
1719
1720			if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
1721				type[0] = MLX4_FS_MC_SNIFFER;
1722			else
1723				type[0] = MLX4_FS_UC_SNIFFER;
1724		}
1725	}
1726
1727	return err;
1728}
1729
1730static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
1731				    struct ib_flow_attr *flow_attr,
1732				    int domain)
1733{
1734	int err = 0, i = 0, j = 0;
1735	struct mlx4_ib_flow *mflow;
1736	enum mlx4_net_trans_promisc_mode type[2];
1737	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
1738	int is_bonded = mlx4_is_bonded(dev);
1739
1740	if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
1741		return ERR_PTR(-EINVAL);
1742
1743	if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
1744	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
1745		return ERR_PTR(-EOPNOTSUPP);
1746
1747	memset(type, 0, sizeof(type));
1748
1749	mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
1750	if (!mflow) {
1751		err = -ENOMEM;
1752		goto err_free;
1753	}
1754
1755	switch (flow_attr->type) {
1756	case IB_FLOW_ATTR_NORMAL:
1757		/* If dont trap flag (continue match) is set, under specific
1758		 * condition traffic be replicated to given qp,
1759		 * without stealing it
1760		 */
1761		if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
1762			err = mlx4_ib_add_dont_trap_rule(dev,
1763							 flow_attr,
1764							 type);
1765			if (err)
1766				goto err_free;
1767		} else {
1768			type[0] = MLX4_FS_REGULAR;
1769		}
1770		break;
1771
1772	case IB_FLOW_ATTR_ALL_DEFAULT:
1773		type[0] = MLX4_FS_ALL_DEFAULT;
1774		break;
1775
1776	case IB_FLOW_ATTR_MC_DEFAULT:
1777		type[0] = MLX4_FS_MC_DEFAULT;
1778		break;
1779
1780	case IB_FLOW_ATTR_SNIFFER:
1781		type[0] = MLX4_FS_MIRROR_RX_PORT;
1782		type[1] = MLX4_FS_MIRROR_SX_PORT;
1783		break;
1784
1785	default:
1786		err = -EINVAL;
1787		goto err_free;
1788	}
1789
1790	while (i < ARRAY_SIZE(type) && type[i]) {
1791		err = __mlx4_ib_create_flow(qp, flow_attr, domain, type[i],
1792					    &mflow->reg_id[i].id);
1793		if (err)
1794			goto err_create_flow;
1795		if (is_bonded) {
1796			/* Application always sees one port so the mirror rule
1797			 * must be on port #2
1798			 */
1799			flow_attr->port = 2;
1800			err = __mlx4_ib_create_flow(qp, flow_attr,
1801						    domain, type[j],
1802						    &mflow->reg_id[j].mirror);
1803			flow_attr->port = 1;
1804			if (err)
1805				goto err_create_flow;
1806			j++;
1807		}
1808
1809		i++;
1810	}
1811
1812	if (i < ARRAY_SIZE(type) && flow_attr->type == IB_FLOW_ATTR_NORMAL) {
1813		err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1814					       &mflow->reg_id[i].id);
1815		if (err)
1816			goto err_create_flow;
1817
1818		if (is_bonded) {
1819			flow_attr->port = 2;
1820			err = mlx4_ib_tunnel_steer_add(qp, flow_attr,
1821						       &mflow->reg_id[j].mirror);
1822			flow_attr->port = 1;
1823			if (err)
1824				goto err_create_flow;
1825			j++;
1826		}
1827		/* function to create mirror rule */
1828		i++;
1829	}
1830
1831	return &mflow->ibflow;
1832
1833err_create_flow:
1834	while (i) {
1835		(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1836					     mflow->reg_id[i].id);
1837		i--;
1838	}
1839
1840	while (j) {
1841		(void)__mlx4_ib_destroy_flow(to_mdev(qp->device)->dev,
1842					     mflow->reg_id[j].mirror);
1843		j--;
1844	}
1845err_free:
1846	kfree(mflow);
1847	return ERR_PTR(err);
1848}
1849
1850static int mlx4_ib_destroy_flow(struct ib_flow *flow_id)
1851{
1852	int err, ret = 0;
1853	int i = 0;
1854	struct mlx4_ib_dev *mdev = to_mdev(flow_id->qp->device);
1855	struct mlx4_ib_flow *mflow = to_mflow(flow_id);
1856
1857	while (i < ARRAY_SIZE(mflow->reg_id) && mflow->reg_id[i].id) {
1858		err = __mlx4_ib_destroy_flow(mdev->dev, mflow->reg_id[i].id);
1859		if (err)
1860			ret = err;
1861		if (mflow->reg_id[i].mirror) {
1862			err = __mlx4_ib_destroy_flow(mdev->dev,
1863						     mflow->reg_id[i].mirror);
1864			if (err)
1865				ret = err;
1866		}
1867		i++;
1868	}
1869
1870	kfree(mflow);
1871	return ret;
1872}
1873
1874static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1875{
1876	int err;
1877	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1878	struct mlx4_dev	*dev = mdev->dev;
1879	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1880	struct mlx4_ib_steering *ib_steering = NULL;
1881	enum mlx4_protocol prot = MLX4_PROT_IB_IPV6;
1882	struct mlx4_flow_reg_id	reg_id;
1883
1884	if (mdev->dev->caps.steering_mode ==
1885	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
1886		ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
1887		if (!ib_steering)
1888			return -ENOMEM;
1889	}
1890
1891	err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
1892				    !!(mqp->flags &
1893				       MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1894				    prot, &reg_id.id);
1895	if (err) {
1896		pr_err("multicast attach op failed, err %d\n", err);
1897		goto err_malloc;
1898	}
1899
1900	reg_id.mirror = 0;
1901	if (mlx4_is_bonded(dev)) {
1902		err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
1903					    (mqp->port == 1) ? 2 : 1,
1904					    !!(mqp->flags &
1905					    MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
1906					    prot, &reg_id.mirror);
1907		if (err)
1908			goto err_add;
1909	}
1910
1911	err = add_gid_entry(ibqp, gid);
1912	if (err)
1913		goto err_add;
1914
1915	if (ib_steering) {
1916		memcpy(ib_steering->gid.raw, gid->raw, 16);
1917		ib_steering->reg_id = reg_id;
1918		mutex_lock(&mqp->mutex);
1919		list_add(&ib_steering->list, &mqp->steering_rules);
1920		mutex_unlock(&mqp->mutex);
1921	}
1922	return 0;
1923
1924err_add:
1925	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1926			      prot, reg_id.id);
1927	if (reg_id.mirror)
1928		mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1929				      prot, reg_id.mirror);
1930err_malloc:
1931	kfree(ib_steering);
1932
1933	return err;
1934}
1935
1936static struct mlx4_ib_gid_entry *find_gid_entry(struct mlx4_ib_qp *qp, u8 *raw)
1937{
1938	struct mlx4_ib_gid_entry *ge;
1939	struct mlx4_ib_gid_entry *tmp;
1940	struct mlx4_ib_gid_entry *ret = NULL;
1941
1942	list_for_each_entry_safe(ge, tmp, &qp->gid_list, list) {
1943		if (!memcmp(raw, ge->gid.raw, 16)) {
1944			ret = ge;
1945			break;
1946		}
1947	}
1948
1949	return ret;
1950}
1951
1952static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
1953{
1954	int err;
1955	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
1956	struct mlx4_dev *dev = mdev->dev;
1957	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
1958	struct net_device *ndev;
1959	struct mlx4_ib_gid_entry *ge;
1960	struct mlx4_flow_reg_id reg_id = {0, 0};
1961	enum mlx4_protocol prot =  MLX4_PROT_IB_IPV6;
1962
1963	if (mdev->dev->caps.steering_mode ==
1964	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
1965		struct mlx4_ib_steering *ib_steering;
1966
1967		mutex_lock(&mqp->mutex);
1968		list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
1969			if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
1970				list_del(&ib_steering->list);
1971				break;
1972			}
1973		}
1974		mutex_unlock(&mqp->mutex);
1975		if (&ib_steering->list == &mqp->steering_rules) {
1976			pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
1977			return -EINVAL;
1978		}
1979		reg_id = ib_steering->reg_id;
1980		kfree(ib_steering);
1981	}
1982
1983	err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1984				    prot, reg_id.id);
1985	if (err)
1986		return err;
1987
1988	if (mlx4_is_bonded(dev)) {
1989		err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
1990					    prot, reg_id.mirror);
1991		if (err)
1992			return err;
1993	}
1994
1995	mutex_lock(&mqp->mutex);
1996	ge = find_gid_entry(mqp, gid->raw);
1997	if (ge) {
1998		spin_lock_bh(&mdev->iboe.lock);
1999		ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
2000		if (ndev)
2001			dev_hold(ndev);
2002		spin_unlock_bh(&mdev->iboe.lock);
2003		if (ndev)
2004			dev_put(ndev);
2005		list_del(&ge->list);
2006		kfree(ge);
2007	} else
2008		pr_warn("could not find mgid entry\n");
2009
2010	mutex_unlock(&mqp->mutex);
2011
2012	return 0;
2013}
2014
2015static int init_node_data(struct mlx4_ib_dev *dev)
2016{
2017	struct ib_smp *in_mad  = NULL;
2018	struct ib_smp *out_mad = NULL;
2019	int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
2020	int err = -ENOMEM;
2021
2022	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
2023	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
2024	if (!in_mad || !out_mad)
2025		goto out;
2026
2027	init_query_mad(in_mad);
2028	in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
2029	if (mlx4_is_master(dev->dev))
2030		mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
2031
2032	err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
2033	if (err)
2034		goto out;
2035
2036	memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX);
2037
2038	in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
2039
2040	err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad);
2041	if (err)
2042		goto out;
2043
2044	dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
2045	memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
2046
2047out:
2048	kfree(in_mad);
2049	kfree(out_mad);
2050	return err;
2051}
2052
2053static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2054			char *buf)
2055{
2056	struct mlx4_ib_dev *dev =
2057		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2058	return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
2059}
2060
2061static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2062			char *buf)
2063{
2064	struct mlx4_ib_dev *dev =
2065		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2066	return sprintf(buf, "%x\n", dev->dev->rev_id);
2067}
2068
2069static ssize_t show_board(struct device *device, struct device_attribute *attr,
2070			  char *buf)
2071{
2072	struct mlx4_ib_dev *dev =
2073		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
2074	return sprintf(buf, "%.*s\n", MLX4_BOARD_ID_LEN,
2075		       dev->dev->board_id);
2076}
2077
2078static DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
2079static DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
2080static DEVICE_ATTR(board_id, S_IRUGO, show_board,  NULL);
2081
2082static struct device_attribute *mlx4_class_attributes[] = {
2083	&dev_attr_hw_rev,
2084	&dev_attr_hca_type,
2085	&dev_attr_board_id
2086};
2087
2088struct diag_counter {
2089	const char *name;
2090	u32 offset;
2091};
2092
2093#define DIAG_COUNTER(_name, _offset)			\
2094	{ .name = #_name, .offset = _offset }
2095
2096static const struct diag_counter diag_basic[] = {
2097	DIAG_COUNTER(rq_num_lle, 0x00),
2098	DIAG_COUNTER(sq_num_lle, 0x04),
2099	DIAG_COUNTER(rq_num_lqpoe, 0x08),
2100	DIAG_COUNTER(sq_num_lqpoe, 0x0C),
2101	DIAG_COUNTER(rq_num_lpe, 0x18),
2102	DIAG_COUNTER(sq_num_lpe, 0x1C),
2103	DIAG_COUNTER(rq_num_wrfe, 0x20),
2104	DIAG_COUNTER(sq_num_wrfe, 0x24),
2105	DIAG_COUNTER(sq_num_mwbe, 0x2C),
2106	DIAG_COUNTER(sq_num_bre, 0x34),
2107	DIAG_COUNTER(sq_num_rire, 0x44),
2108	DIAG_COUNTER(rq_num_rire, 0x48),
2109	DIAG_COUNTER(sq_num_rae, 0x4C),
2110	DIAG_COUNTER(rq_num_rae, 0x50),
2111	DIAG_COUNTER(sq_num_roe, 0x54),
2112	DIAG_COUNTER(sq_num_tree, 0x5C),
2113	DIAG_COUNTER(sq_num_rree, 0x64),
2114	DIAG_COUNTER(rq_num_rnr, 0x68),
2115	DIAG_COUNTER(sq_num_rnr, 0x6C),
2116	DIAG_COUNTER(rq_num_oos, 0x100),
2117	DIAG_COUNTER(sq_num_oos, 0x104),
2118};
2119
2120static const struct diag_counter diag_ext[] = {
2121	DIAG_COUNTER(rq_num_dup, 0x130),
2122	DIAG_COUNTER(sq_num_to, 0x134),
2123};
2124
2125static const struct diag_counter diag_device_only[] = {
2126	DIAG_COUNTER(num_cqovf, 0x1A0),
2127	DIAG_COUNTER(rq_num_udsdprd, 0x118),
2128};
2129
2130static struct rdma_hw_stats *mlx4_ib_alloc_hw_stats(struct ib_device *ibdev,
2131						    u8 port_num)
2132{
2133	struct mlx4_ib_dev *dev = to_mdev(ibdev);
2134	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2135
2136	if (!diag[!!port_num].name)
2137		return NULL;
2138
2139	return rdma_alloc_hw_stats_struct(diag[!!port_num].name,
2140					  diag[!!port_num].num_counters,
2141					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
2142}
2143
2144static int mlx4_ib_get_hw_stats(struct ib_device *ibdev,
2145				struct rdma_hw_stats *stats,
2146				u8 port, int index)
2147{
2148	struct mlx4_ib_dev *dev = to_mdev(ibdev);
2149	struct mlx4_ib_diag_counters *diag = dev->diag_counters;
2150	u32 hw_value[ARRAY_SIZE(diag_device_only) +
2151		ARRAY_SIZE(diag_ext) + ARRAY_SIZE(diag_basic)] = {};
2152	int ret;
2153	int i;
2154
2155	ret = mlx4_query_diag_counters(dev->dev,
2156				       MLX4_OP_MOD_QUERY_TRANSPORT_CI_ERRORS,
2157				       diag[!!port].offset, hw_value,
2158				       diag[!!port].num_counters, port);
2159
2160	if (ret)
2161		return ret;
2162
2163	for (i = 0; i < diag[!!port].num_counters; i++)
2164		stats->value[i] = hw_value[i];
2165
2166	return diag[!!port].num_counters;
2167}
2168
2169static int __mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev,
2170					 const char ***name,
2171					 u32 **offset,
2172					 u32 *num,
2173					 bool port)
2174{
2175	u32 num_counters;
2176
2177	num_counters = ARRAY_SIZE(diag_basic);
2178
2179	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT)
2180		num_counters += ARRAY_SIZE(diag_ext);
2181
2182	if (!port)
2183		num_counters += ARRAY_SIZE(diag_device_only);
2184
2185	*name = kcalloc(num_counters, sizeof(**name), GFP_KERNEL);
2186	if (!*name)
2187		return -ENOMEM;
2188
2189	*offset = kcalloc(num_counters, sizeof(**offset), GFP_KERNEL);
2190	if (!*offset)
2191		goto err_name;
2192
2193	*num = num_counters;
2194
2195	return 0;
2196
2197err_name:
2198	kfree(*name);
2199	return -ENOMEM;
2200}
2201
2202static void mlx4_ib_fill_diag_counters(struct mlx4_ib_dev *ibdev,
2203				       const char **name,
2204				       u32 *offset,
2205				       bool port)
2206{
2207	int i;
2208	int j;
2209
2210	for (i = 0, j = 0; i < ARRAY_SIZE(diag_basic); i++, j++) {
2211		name[i] = diag_basic[i].name;
2212		offset[i] = diag_basic[i].offset;
2213	}
2214
2215	if (ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT) {
2216		for (i = 0; i < ARRAY_SIZE(diag_ext); i++, j++) {
2217			name[j] = diag_ext[i].name;
2218			offset[j] = diag_ext[i].offset;
2219		}
2220	}
2221
2222	if (!port) {
2223		for (i = 0; i < ARRAY_SIZE(diag_device_only); i++, j++) {
2224			name[j] = diag_device_only[i].name;
2225			offset[j] = diag_device_only[i].offset;
2226		}
2227	}
2228}
2229
2230static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev)
2231{
2232	struct mlx4_ib_diag_counters *diag = ibdev->diag_counters;
2233	int i;
2234	int ret;
2235	bool per_port = !!(ibdev->dev->caps.flags2 &
2236		MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT);
2237
2238	if (mlx4_is_slave(ibdev->dev))
2239		return 0;
2240
2241	for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2242		/* i == 1 means we are building port counters */
2243		if (i && !per_port)
2244			continue;
2245
2246		ret = __mlx4_ib_alloc_diag_counters(ibdev, &diag[i].name,
2247						    &diag[i].offset,
2248						    &diag[i].num_counters, i);
2249		if (ret)
2250			goto err_alloc;
2251
2252		mlx4_ib_fill_diag_counters(ibdev, diag[i].name,
2253					   diag[i].offset, i);
2254	}
2255
2256	ibdev->ib_dev.get_hw_stats	= mlx4_ib_get_hw_stats;
2257	ibdev->ib_dev.alloc_hw_stats	= mlx4_ib_alloc_hw_stats;
2258
2259	return 0;
2260
2261err_alloc:
2262	if (i) {
2263		kfree(diag[i - 1].name);
2264		kfree(diag[i - 1].offset);
2265	}
2266
2267	return ret;
2268}
2269
2270static void mlx4_ib_diag_cleanup(struct mlx4_ib_dev *ibdev)
2271{
2272	int i;
2273
2274	for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) {
2275		kfree(ibdev->diag_counters[i].offset);
2276		kfree(ibdev->diag_counters[i].name);
2277	}
2278}
2279
2280#define MLX4_IB_INVALID_MAC	((u64)-1)
2281static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
2282			       struct net_device *dev,
2283			       int port)
2284{
2285	u64 new_smac = 0;
2286	u64 release_mac = MLX4_IB_INVALID_MAC;
2287	struct mlx4_ib_qp *qp;
2288
2289	new_smac = mlx4_mac_to_u64(IF_LLADDR(dev));
2290
2291	atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
2292
2293	/* no need for update QP1 and mac registration in non-SRIOV */
2294	if (!mlx4_is_mfunc(ibdev->dev))
2295		return;
2296
2297	mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
2298	qp = ibdev->qp1_proxy[port - 1];
2299	if (qp) {
2300		int new_smac_index;
2301		u64 old_smac;
2302		struct mlx4_update_qp_params update_params;
2303
2304		mutex_lock(&qp->mutex);
2305		old_smac = qp->pri.smac;
2306		if (new_smac == old_smac)
2307			goto unlock;
2308
2309		new_smac_index = mlx4_register_mac(ibdev->dev, port, new_smac);
2310
2311		if (new_smac_index < 0)
2312			goto unlock;
2313
2314		update_params.smac_index = new_smac_index;
2315		if (mlx4_update_qp(ibdev->dev, qp->mqp.qpn, MLX4_UPDATE_QP_SMAC,
2316				   &update_params)) {
2317			release_mac = new_smac;
2318			goto unlock;
2319		}
2320		/* if old port was zero, no mac was yet registered for this QP */
2321		if (qp->pri.smac_port)
2322			release_mac = old_smac;
2323		qp->pri.smac = new_smac;
2324		qp->pri.smac_port = port;
2325		qp->pri.smac_index = new_smac_index;
2326	}
2327
2328unlock:
2329	if (release_mac != MLX4_IB_INVALID_MAC)
2330		mlx4_unregister_mac(ibdev->dev, port, release_mac);
2331	if (qp)
2332		mutex_unlock(&qp->mutex);
2333	mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
2334}
2335
2336static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
2337				 struct net_device *dev,
2338				 unsigned long event)
2339
2340{
2341	struct mlx4_ib_iboe *iboe;
2342	int update_qps_port = -1;
2343	int port;
2344
2345	iboe = &ibdev->iboe;
2346
2347	spin_lock_bh(&iboe->lock);
2348	mlx4_foreach_ib_transport_port(port, ibdev->dev) {
2349
2350		iboe->netdevs[port - 1] =
2351			mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
2352
2353		if (dev == iboe->netdevs[port - 1] &&
2354		    (event == NETDEV_CHANGEADDR || event == NETDEV_REGISTER ||
2355		     event == NETDEV_UP || event == NETDEV_CHANGE))
2356			update_qps_port = port;
2357
2358	}
2359	spin_unlock_bh(&iboe->lock);
2360
2361	if (update_qps_port > 0)
2362		mlx4_ib_update_qps(ibdev, dev, update_qps_port);
2363}
2364
2365static int mlx4_ib_netdev_event(struct notifier_block *this,
2366				unsigned long event, void *ptr)
2367{
2368	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2369	struct mlx4_ib_dev *ibdev;
2370
2371	if (!net_eq(dev_net(dev), &init_net))
2372		return NOTIFY_DONE;
2373
2374	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
2375	mlx4_ib_scan_netdevs(ibdev, dev, event);
2376
2377	return NOTIFY_DONE;
2378}
2379
2380static void init_pkeys(struct mlx4_ib_dev *ibdev)
2381{
2382	int port;
2383	int slave;
2384	int i;
2385
2386	if (mlx4_is_master(ibdev->dev)) {
2387		for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
2388		     ++slave) {
2389			for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
2390				for (i = 0;
2391				     i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
2392				     ++i) {
2393					ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
2394					/* master has the identity virt2phys pkey mapping */
2395						(slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
2396							ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
2397					mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
2398							     ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
2399				}
2400			}
2401		}
2402		/* initialize pkey cache */
2403		for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
2404			for (i = 0;
2405			     i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
2406			     ++i)
2407				ibdev->pkeys.phys_pkey_cache[port-1][i] =
2408					(i) ? 0 : 0xFFFF;
2409		}
2410	}
2411}
2412
2413static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2414{
2415	int i, j, eq = 0, total_eqs = 0;
2416
2417	ibdev->eq_table = kcalloc(dev->caps.num_comp_vectors,
2418				  sizeof(ibdev->eq_table[0]), GFP_KERNEL);
2419	if (!ibdev->eq_table)
2420		return;
2421
2422	for (i = 1; i <= dev->caps.num_ports; i++) {
2423		for (j = 0; j < mlx4_get_eqs_per_port(dev, i);
2424		     j++, total_eqs++) {
2425			if (i > 1 &&  mlx4_is_eq_shared(dev, total_eqs))
2426				continue;
2427			ibdev->eq_table[eq] = total_eqs;
2428			if (!mlx4_assign_eq(dev, i,
2429					    &ibdev->eq_table[eq]))
2430				eq++;
2431			else
2432				ibdev->eq_table[eq] = -1;
2433		}
2434	}
2435
2436	for (i = eq; i < dev->caps.num_comp_vectors;
2437	     ibdev->eq_table[i++] = -1)
2438		;
2439
2440	/* Advertise the new number of EQs to clients */
2441	ibdev->ib_dev.num_comp_vectors = eq;
2442}
2443
2444static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
2445{
2446	int i;
2447	int total_eqs = ibdev->ib_dev.num_comp_vectors;
2448
2449	/* no eqs were allocated */
2450	if (!ibdev->eq_table)
2451		return;
2452
2453	/* Reset the advertised EQ number */
2454	ibdev->ib_dev.num_comp_vectors = 0;
2455
2456	for (i = 0; i < total_eqs; i++)
2457		mlx4_release_eq(dev, ibdev->eq_table[i]);
2458
2459	kfree(ibdev->eq_table);
2460	ibdev->eq_table = NULL;
2461}
2462
2463static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
2464			       struct ib_port_immutable *immutable)
2465{
2466	struct ib_port_attr attr;
2467	struct mlx4_ib_dev *mdev = to_mdev(ibdev);
2468	int err;
2469
2470	err = mlx4_ib_query_port(ibdev, port_num, &attr);
2471	if (err)
2472		return err;
2473
2474	immutable->pkey_tbl_len = attr.pkey_tbl_len;
2475	immutable->gid_tbl_len = attr.gid_tbl_len;
2476
2477	if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
2478		immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
2479	} else {
2480		if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
2481			immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
2482		if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
2483			immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
2484				RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
2485	}
2486
2487	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2488
2489	return 0;
2490}
2491
2492static void get_fw_ver_str(struct ib_device *device, char *str,
2493			   size_t str_len)
2494{
2495	struct mlx4_ib_dev *dev =
2496		container_of(device, struct mlx4_ib_dev, ib_dev);
2497	snprintf(str, str_len, "%d.%d.%d",
2498		 (int) (dev->dev->caps.fw_ver >> 32),
2499		 (int) (dev->dev->caps.fw_ver >> 16) & 0xffff,
2500		 (int) dev->dev->caps.fw_ver & 0xffff);
2501}
2502
2503static void *mlx4_ib_add(struct mlx4_dev *dev)
2504{
2505	struct mlx4_ib_dev *ibdev;
2506	int num_ports;
2507	int i, j;
2508	int err;
2509	struct mlx4_ib_iboe *iboe;
2510	int ib_num_ports = 0;
2511	int num_req_counters;
2512	int allocated;
2513	u32 counter_index;
2514	struct counter_index *new_counter_index = NULL;
2515
2516	pr_info_once("%s", mlx4_ib_version);
2517
2518	num_ports = 0;
2519	mlx4_foreach_ib_transport_port(i, dev)
2520		num_ports++;
2521
2522	/* No point in registering a device with no ports... */
2523	if (num_ports == 0)
2524		return NULL;
2525
2526	ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
2527	if (!ibdev) {
2528		dev_err(&dev->persist->pdev->dev,
2529			"Device struct alloc failed\n");
2530		return NULL;
2531	}
2532
2533	iboe = &ibdev->iboe;
2534
2535	if (mlx4_pd_alloc(dev, &ibdev->priv_pdn))
2536		goto err_dealloc;
2537
2538	if (mlx4_uar_alloc(dev, &ibdev->priv_uar))
2539		goto err_pd;
2540
2541	ibdev->uar_map = ioremap((phys_addr_t) ibdev->priv_uar.pfn << PAGE_SHIFT,
2542				 PAGE_SIZE);
2543	if (!ibdev->uar_map)
2544		goto err_uar;
2545	MLX4_INIT_DOORBELL_LOCK(&ibdev->uar_lock);
2546
2547	ibdev->dev = dev;
2548	ibdev->bond_next_port	= 0;
2549
2550	strlcpy(ibdev->ib_dev.name, "mlx4_%d", IB_DEVICE_NAME_MAX);
2551	ibdev->ib_dev.owner		= THIS_MODULE;
2552	ibdev->ib_dev.node_type		= RDMA_NODE_IB_CA;
2553	ibdev->ib_dev.local_dma_lkey	= dev->caps.reserved_lkey;
2554	ibdev->num_ports		= num_ports;
2555	ibdev->ib_dev.phys_port_cnt     = mlx4_is_bonded(dev) ?
2556						1 : ibdev->num_ports;
2557	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
2558	ibdev->ib_dev.dma_device	= &dev->persist->pdev->dev;
2559	ibdev->ib_dev.get_netdev	= mlx4_ib_get_netdev;
2560	ibdev->ib_dev.add_gid		= mlx4_ib_add_gid;
2561	ibdev->ib_dev.del_gid		= mlx4_ib_del_gid;
2562
2563	if (dev->caps.userspace_caps)
2564		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
2565	else
2566		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
2567
2568	ibdev->ib_dev.uverbs_cmd_mask	=
2569		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
2570		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
2571		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
2572		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
2573		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
2574		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
2575		(1ull << IB_USER_VERBS_CMD_REREG_MR)		|
2576		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
2577		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
2578		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
2579		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
2580		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
2581		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
2582		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
2583		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
2584		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
2585		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
2586		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
2587		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
2588		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
2589		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
2590		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
2591		(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)		|
2592		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
2593
2594	ibdev->ib_dev.query_device	= mlx4_ib_query_device;
2595	ibdev->ib_dev.query_port	= mlx4_ib_query_port;
2596	ibdev->ib_dev.get_link_layer	= mlx4_ib_port_link_layer;
2597	ibdev->ib_dev.query_gid		= mlx4_ib_query_gid;
2598	ibdev->ib_dev.query_pkey	= mlx4_ib_query_pkey;
2599	ibdev->ib_dev.modify_device	= mlx4_ib_modify_device;
2600	ibdev->ib_dev.modify_port	= mlx4_ib_modify_port;
2601	ibdev->ib_dev.alloc_ucontext	= mlx4_ib_alloc_ucontext;
2602	ibdev->ib_dev.dealloc_ucontext	= mlx4_ib_dealloc_ucontext;
2603	ibdev->ib_dev.mmap		= mlx4_ib_mmap;
2604	ibdev->ib_dev.alloc_pd		= mlx4_ib_alloc_pd;
2605	ibdev->ib_dev.dealloc_pd	= mlx4_ib_dealloc_pd;
2606	ibdev->ib_dev.create_ah		= mlx4_ib_create_ah;
2607	ibdev->ib_dev.query_ah		= mlx4_ib_query_ah;
2608	ibdev->ib_dev.destroy_ah	= mlx4_ib_destroy_ah;
2609	ibdev->ib_dev.create_srq	= mlx4_ib_create_srq;
2610	ibdev->ib_dev.modify_srq	= mlx4_ib_modify_srq;
2611	ibdev->ib_dev.query_srq		= mlx4_ib_query_srq;
2612	ibdev->ib_dev.destroy_srq	= mlx4_ib_destroy_srq;
2613	ibdev->ib_dev.post_srq_recv	= mlx4_ib_post_srq_recv;
2614	ibdev->ib_dev.create_qp		= mlx4_ib_create_qp;
2615	ibdev->ib_dev.modify_qp		= mlx4_ib_modify_qp;
2616	ibdev->ib_dev.query_qp		= mlx4_ib_query_qp;
2617	ibdev->ib_dev.destroy_qp	= mlx4_ib_destroy_qp;
2618	ibdev->ib_dev.post_send		= mlx4_ib_post_send;
2619	ibdev->ib_dev.post_recv		= mlx4_ib_post_recv;
2620	ibdev->ib_dev.create_cq		= mlx4_ib_create_cq;
2621	ibdev->ib_dev.modify_cq		= mlx4_ib_modify_cq;
2622	ibdev->ib_dev.resize_cq		= mlx4_ib_resize_cq;
2623	ibdev->ib_dev.destroy_cq	= mlx4_ib_destroy_cq;
2624	ibdev->ib_dev.poll_cq		= mlx4_ib_poll_cq;
2625	ibdev->ib_dev.req_notify_cq	= mlx4_ib_arm_cq;
2626	ibdev->ib_dev.get_dma_mr	= mlx4_ib_get_dma_mr;
2627	ibdev->ib_dev.reg_user_mr	= mlx4_ib_reg_user_mr;
2628	ibdev->ib_dev.rereg_user_mr	= mlx4_ib_rereg_user_mr;
2629	ibdev->ib_dev.reg_phys_mr	= mlx4_ib_reg_phys_mr;
2630	ibdev->ib_dev.dereg_mr		= mlx4_ib_dereg_mr;
2631	ibdev->ib_dev.alloc_mr		= mlx4_ib_alloc_mr;
2632	ibdev->ib_dev.map_mr_sg		= mlx4_ib_map_mr_sg;
2633	ibdev->ib_dev.attach_mcast	= mlx4_ib_mcg_attach;
2634	ibdev->ib_dev.detach_mcast	= mlx4_ib_mcg_detach;
2635	ibdev->ib_dev.process_mad	= mlx4_ib_process_mad;
2636	ibdev->ib_dev.get_port_immutable = mlx4_port_immutable;
2637	ibdev->ib_dev.get_dev_fw_str    = get_fw_ver_str;
2638
2639	if (!mlx4_is_slave(ibdev->dev)) {
2640		ibdev->ib_dev.alloc_fmr		= mlx4_ib_fmr_alloc;
2641		ibdev->ib_dev.map_phys_fmr	= mlx4_ib_map_phys_fmr;
2642		ibdev->ib_dev.unmap_fmr		= mlx4_ib_unmap_fmr;
2643		ibdev->ib_dev.dealloc_fmr	= mlx4_ib_fmr_dealloc;
2644	}
2645
2646	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_MEM_WINDOW ||
2647	    dev->caps.bmme_flags & MLX4_BMME_FLAG_TYPE_2_WIN) {
2648		ibdev->ib_dev.alloc_mw = mlx4_ib_alloc_mw;
2649		ibdev->ib_dev.dealloc_mw = mlx4_ib_dealloc_mw;
2650
2651		ibdev->ib_dev.uverbs_cmd_mask |=
2652			(1ull << IB_USER_VERBS_CMD_ALLOC_MW) |
2653			(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
2654	}
2655
2656	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
2657		ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
2658		ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
2659		ibdev->ib_dev.uverbs_cmd_mask |=
2660			(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
2661			(1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
2662	}
2663
2664	if (check_flow_steering_support(dev)) {
2665		ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
2666		ibdev->ib_dev.create_flow	= mlx4_ib_create_flow;
2667		ibdev->ib_dev.destroy_flow	= mlx4_ib_destroy_flow;
2668
2669		ibdev->ib_dev.uverbs_ex_cmd_mask	|=
2670			(1ull << IB_USER_VERBS_EX_CMD_CREATE_FLOW) |
2671			(1ull << IB_USER_VERBS_EX_CMD_DESTROY_FLOW);
2672	}
2673
2674	ibdev->ib_dev.uverbs_ex_cmd_mask |=
2675		(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
2676		(1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
2677		(1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
2678
2679	mlx4_ib_alloc_eqs(dev, ibdev);
2680
2681	spin_lock_init(&iboe->lock);
2682
2683	if (init_node_data(ibdev))
2684		goto err_map;
2685	mlx4_init_sl2vl_tbl(ibdev);
2686
2687	for (i = 0; i < ibdev->num_ports; ++i) {
2688		mutex_init(&ibdev->counters_table[i].mutex);
2689		INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
2690	}
2691
2692	num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
2693	for (i = 0; i < num_req_counters; ++i) {
2694		mutex_init(&ibdev->qp1_proxy_lock[i]);
2695		allocated = 0;
2696		if (mlx4_ib_port_link_layer(&ibdev->ib_dev, i + 1) ==
2697						IB_LINK_LAYER_ETHERNET) {
2698			err = mlx4_counter_alloc(ibdev->dev, &counter_index);
2699			/* if failed to allocate a new counter, use default */
2700			if (err)
2701				counter_index =
2702					mlx4_get_default_counter_index(dev,
2703								       i + 1);
2704			else
2705				allocated = 1;
2706		} else { /* IB_LINK_LAYER_INFINIBAND use the default counter */
2707			counter_index = mlx4_get_default_counter_index(dev,
2708								       i + 1);
2709		}
2710		new_counter_index = kmalloc(sizeof(*new_counter_index),
2711					    GFP_KERNEL);
2712		if (!new_counter_index) {
2713			if (allocated)
2714				mlx4_counter_free(ibdev->dev, counter_index);
2715			goto err_counter;
2716		}
2717		new_counter_index->index = counter_index;
2718		new_counter_index->allocated = allocated;
2719		list_add_tail(&new_counter_index->list,
2720			      &ibdev->counters_table[i].counters_list);
2721		ibdev->counters_table[i].default_counter = counter_index;
2722		pr_info("counter index %d for port %d allocated %d\n",
2723			counter_index, i + 1, allocated);
2724	}
2725	if (mlx4_is_bonded(dev))
2726		for (i = 1; i < ibdev->num_ports ; ++i) {
2727			new_counter_index =
2728					kmalloc(sizeof(struct counter_index),
2729						GFP_KERNEL);
2730			if (!new_counter_index)
2731				goto err_counter;
2732			new_counter_index->index = counter_index;
2733			new_counter_index->allocated = 0;
2734			list_add_tail(&new_counter_index->list,
2735				      &ibdev->counters_table[i].counters_list);
2736			ibdev->counters_table[i].default_counter =
2737								counter_index;
2738		}
2739
2740	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
2741		ib_num_ports++;
2742
2743	spin_lock_init(&ibdev->sm_lock);
2744	mutex_init(&ibdev->cap_mask_mutex);
2745	INIT_LIST_HEAD(&ibdev->qp_list);
2746	spin_lock_init(&ibdev->reset_flow_resource_lock);
2747
2748	if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
2749	    ib_num_ports) {
2750		ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
2751		err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
2752					    MLX4_IB_UC_STEER_QPN_ALIGN,
2753					    &ibdev->steer_qpn_base, 0);
2754		if (err)
2755			goto err_counter;
2756
2757		ibdev->ib_uc_qpns_bitmap =
2758			kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
2759				sizeof(long),
2760				GFP_KERNEL);
2761		if (!ibdev->ib_uc_qpns_bitmap) {
2762			dev_err(&dev->persist->pdev->dev,
2763				"bit map alloc failed\n");
2764			goto err_steer_qp_release;
2765		}
2766
2767		bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
2768
2769		err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
2770				dev, ibdev->steer_qpn_base,
2771				ibdev->steer_qpn_base +
2772				ibdev->steer_qpn_count - 1);
2773		if (err)
2774			goto err_steer_free_bitmap;
2775	}
2776
2777	for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
2778		atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
2779
2780	if (mlx4_ib_alloc_diag_counters(ibdev))
2781		goto err_steer_free_bitmap;
2782
2783	if (ib_register_device(&ibdev->ib_dev, NULL))
2784		goto err_diag_counters;
2785
2786	if (mlx4_ib_mad_init(ibdev))
2787		goto err_reg;
2788
2789	if (mlx4_ib_init_sriov(ibdev))
2790		goto err_mad;
2791
2792	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE ||
2793	    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
2794		if (!iboe->nb.notifier_call) {
2795			iboe->nb.notifier_call = mlx4_ib_netdev_event;
2796			err = register_netdevice_notifier(&iboe->nb);
2797			if (err) {
2798				iboe->nb.notifier_call = NULL;
2799				goto err_notif;
2800			}
2801		}
2802		if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2) {
2803			err = mlx4_config_roce_v2_port(dev, ROCE_V2_UDP_DPORT);
2804			if (err) {
2805				goto err_notif;
2806			}
2807		}
2808	}
2809
2810	for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
2811		if (device_create_file(&ibdev->ib_dev.dev,
2812				       mlx4_class_attributes[j]))
2813			goto err_notif;
2814	}
2815
2816	ibdev->ib_active = true;
2817
2818	if (mlx4_is_mfunc(ibdev->dev))
2819		init_pkeys(ibdev);
2820
2821	/* create paravirt contexts for any VFs which are active */
2822	if (mlx4_is_master(ibdev->dev)) {
2823		for (j = 0; j < MLX4_MFUNC_MAX; j++) {
2824			if (j == mlx4_master_func_num(ibdev->dev))
2825				continue;
2826			if (mlx4_is_slave_active(ibdev->dev, j))
2827				do_slave_init(ibdev, j, 1);
2828		}
2829	}
2830	return ibdev;
2831
2832err_notif:
2833	if (ibdev->iboe.nb.notifier_call) {
2834		if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2835			pr_warn("failure unregistering notifier\n");
2836		ibdev->iboe.nb.notifier_call = NULL;
2837	}
2838	flush_workqueue(wq);
2839
2840	mlx4_ib_close_sriov(ibdev);
2841
2842err_mad:
2843	mlx4_ib_mad_cleanup(ibdev);
2844
2845err_reg:
2846	ib_unregister_device(&ibdev->ib_dev);
2847
2848err_diag_counters:
2849	mlx4_ib_diag_cleanup(ibdev);
2850
2851err_steer_free_bitmap:
2852	kfree(ibdev->ib_uc_qpns_bitmap);
2853
2854err_steer_qp_release:
2855	if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
2856		mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
2857				      ibdev->steer_qpn_count);
2858err_counter:
2859	for (i = 0; i < ibdev->num_ports; ++i)
2860		mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
2861
2862err_map:
2863	iounmap(ibdev->uar_map);
2864
2865err_uar:
2866	mlx4_uar_free(dev, &ibdev->priv_uar);
2867
2868err_pd:
2869	mlx4_pd_free(dev, ibdev->priv_pdn);
2870
2871err_dealloc:
2872	ib_dealloc_device(&ibdev->ib_dev);
2873
2874	return NULL;
2875}
2876
2877int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
2878{
2879	int offset;
2880
2881	WARN_ON(!dev->ib_uc_qpns_bitmap);
2882
2883	offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
2884					 dev->steer_qpn_count,
2885					 get_count_order(count));
2886	if (offset < 0)
2887		return offset;
2888
2889	*qpn = dev->steer_qpn_base + offset;
2890	return 0;
2891}
2892
2893void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
2894{
2895	if (!qpn ||
2896	    dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
2897		return;
2898
2899	BUG_ON(qpn < dev->steer_qpn_base);
2900
2901	bitmap_release_region(dev->ib_uc_qpns_bitmap,
2902			      qpn - dev->steer_qpn_base,
2903			      get_count_order(count));
2904}
2905
2906int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
2907			 int is_attach)
2908{
2909	int err;
2910	size_t flow_size;
2911	struct ib_flow_attr *flow = NULL;
2912	struct ib_flow_spec_ib *ib_spec;
2913
2914	if (is_attach) {
2915		flow_size = sizeof(struct ib_flow_attr) +
2916			    sizeof(struct ib_flow_spec_ib);
2917		flow = kzalloc(flow_size, GFP_KERNEL);
2918		if (!flow)
2919			return -ENOMEM;
2920		flow->port = mqp->port;
2921		flow->num_of_specs = 1;
2922		flow->size = flow_size;
2923		ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
2924		ib_spec->type = IB_FLOW_SPEC_IB;
2925		ib_spec->size = sizeof(struct ib_flow_spec_ib);
2926		/* Add an empty rule for IB L2 */
2927		memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
2928
2929		err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
2930					    IB_FLOW_DOMAIN_NIC,
2931					    MLX4_FS_REGULAR,
2932					    &mqp->reg_id);
2933	} else {
2934		err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
2935	}
2936	kfree(flow);
2937	return err;
2938}
2939
2940static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
2941{
2942	struct mlx4_ib_dev *ibdev = ibdev_ptr;
2943	int p;
2944
2945	ibdev->ib_active = false;
2946	flush_workqueue(wq);
2947
2948	mlx4_ib_close_sriov(ibdev);
2949	mlx4_ib_mad_cleanup(ibdev);
2950	ib_unregister_device(&ibdev->ib_dev);
2951	mlx4_ib_diag_cleanup(ibdev);
2952	if (ibdev->iboe.nb.notifier_call) {
2953		if (unregister_netdevice_notifier(&ibdev->iboe.nb))
2954			pr_warn("failure unregistering notifier\n");
2955		ibdev->iboe.nb.notifier_call = NULL;
2956	}
2957
2958	if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
2959		mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
2960				      ibdev->steer_qpn_count);
2961		kfree(ibdev->ib_uc_qpns_bitmap);
2962	}
2963
2964	iounmap(ibdev->uar_map);
2965	for (p = 0; p < ibdev->num_ports; ++p)
2966		mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
2967
2968	mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
2969		mlx4_CLOSE_PORT(dev, p);
2970
2971	mlx4_ib_free_eqs(dev, ibdev);
2972
2973	mlx4_uar_free(dev, &ibdev->priv_uar);
2974	mlx4_pd_free(dev, ibdev->priv_pdn);
2975	ib_dealloc_device(&ibdev->ib_dev);
2976}
2977
2978static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
2979{
2980	struct mlx4_ib_demux_work **dm = NULL;
2981	struct mlx4_dev *dev = ibdev->dev;
2982	int i;
2983	unsigned long flags;
2984	struct mlx4_active_ports actv_ports;
2985	unsigned int ports;
2986	unsigned int first_port;
2987
2988	if (!mlx4_is_master(dev))
2989		return;
2990
2991	actv_ports = mlx4_get_active_ports(dev, slave);
2992	ports = bitmap_weight(actv_ports.ports, dev->caps.num_ports);
2993	first_port = find_first_bit(actv_ports.ports, dev->caps.num_ports);
2994
2995	dm = kcalloc(ports, sizeof(*dm), GFP_ATOMIC);
2996	if (!dm) {
2997		pr_err("failed to allocate memory for tunneling qp update\n");
2998		return;
2999	}
3000
3001	for (i = 0; i < ports; i++) {
3002		dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
3003		if (!dm[i]) {
3004			pr_err("failed to allocate memory for tunneling qp update work struct\n");
3005			while (--i >= 0)
3006				kfree(dm[i]);
3007			goto out;
3008		}
3009		INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
3010		dm[i]->port = first_port + i + 1;
3011		dm[i]->slave = slave;
3012		dm[i]->do_init = do_init;
3013		dm[i]->dev = ibdev;
3014	}
3015	/* initialize or tear down tunnel QPs for the slave */
3016	spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
3017	if (!ibdev->sriov.is_going_down) {
3018		for (i = 0; i < ports; i++)
3019			queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
3020		spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
3021	} else {
3022		spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
3023		for (i = 0; i < ports; i++)
3024			kfree(dm[i]);
3025	}
3026out:
3027	kfree(dm);
3028	return;
3029}
3030
3031static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
3032{
3033	struct mlx4_ib_qp *mqp;
3034	unsigned long flags_qp;
3035	unsigned long flags_cq;
3036	struct mlx4_ib_cq *send_mcq, *recv_mcq;
3037	struct list_head    cq_notify_list;
3038	struct mlx4_cq *mcq;
3039	unsigned long flags;
3040
3041	pr_warn("mlx4_ib_handle_catas_error was started\n");
3042	INIT_LIST_HEAD(&cq_notify_list);
3043
3044	/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
3045	spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
3046
3047	list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
3048		spin_lock_irqsave(&mqp->sq.lock, flags_qp);
3049		if (mqp->sq.tail != mqp->sq.head) {
3050			send_mcq = to_mcq(mqp->ibqp.send_cq);
3051			spin_lock_irqsave(&send_mcq->lock, flags_cq);
3052			if (send_mcq->mcq.comp &&
3053			    mqp->ibqp.send_cq->comp_handler) {
3054				if (!send_mcq->mcq.reset_notify_added) {
3055					send_mcq->mcq.reset_notify_added = 1;
3056					list_add_tail(&send_mcq->mcq.reset_notify,
3057						      &cq_notify_list);
3058				}
3059			}
3060			spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
3061		}
3062		spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
3063		/* Now, handle the QP's receive queue */
3064		spin_lock_irqsave(&mqp->rq.lock, flags_qp);
3065		/* no handling is needed for SRQ */
3066		if (!mqp->ibqp.srq) {
3067			if (mqp->rq.tail != mqp->rq.head) {
3068				recv_mcq = to_mcq(mqp->ibqp.recv_cq);
3069				spin_lock_irqsave(&recv_mcq->lock, flags_cq);
3070				if (recv_mcq->mcq.comp &&
3071				    mqp->ibqp.recv_cq->comp_handler) {
3072					if (!recv_mcq->mcq.reset_notify_added) {
3073						recv_mcq->mcq.reset_notify_added = 1;
3074						list_add_tail(&recv_mcq->mcq.reset_notify,
3075							      &cq_notify_list);
3076					}
3077				}
3078				spin_unlock_irqrestore(&recv_mcq->lock,
3079						       flags_cq);
3080			}
3081		}
3082		spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
3083	}
3084
3085	list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
3086		mcq->comp(mcq);
3087	}
3088	spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
3089	pr_warn("mlx4_ib_handle_catas_error ended\n");
3090}
3091
3092static void handle_bonded_port_state_event(struct work_struct *work)
3093{
3094	struct ib_event_work *ew =
3095		container_of(work, struct ib_event_work, work);
3096	struct mlx4_ib_dev *ibdev = ew->ib_dev;
3097	enum ib_port_state bonded_port_state = IB_PORT_NOP;
3098	int i;
3099	struct ib_event ibev;
3100
3101	kfree(ew);
3102	spin_lock_bh(&ibdev->iboe.lock);
3103	for (i = 0; i < MLX4_MAX_PORTS; ++i) {
3104		struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
3105		enum ib_port_state curr_port_state;
3106
3107		if (!curr_netdev)
3108			continue;
3109
3110		curr_port_state =
3111			(netif_running(curr_netdev) &&
3112			 netif_carrier_ok(curr_netdev)) ?
3113			IB_PORT_ACTIVE : IB_PORT_DOWN;
3114
3115		bonded_port_state = (bonded_port_state != IB_PORT_ACTIVE) ?
3116			curr_port_state : IB_PORT_ACTIVE;
3117	}
3118	spin_unlock_bh(&ibdev->iboe.lock);
3119
3120	ibev.device = &ibdev->ib_dev;
3121	ibev.element.port_num = 1;
3122	ibev.event = (bonded_port_state == IB_PORT_ACTIVE) ?
3123		IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
3124
3125	ib_dispatch_event(&ibev);
3126}
3127
3128void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port)
3129{
3130	u64 sl2vl;
3131	int err;
3132
3133	err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl);
3134	if (err) {
3135		pr_err("Unable to get current sl to vl mapping for port %d.  Using all zeroes (%d)\n",
3136		       port, err);
3137		sl2vl = 0;
3138	}
3139	atomic64_set(&mdev->sl2vl[port - 1], sl2vl);
3140}
3141
3142static void ib_sl2vl_update_work(struct work_struct *work)
3143{
3144	struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
3145	struct mlx4_ib_dev *mdev = ew->ib_dev;
3146	int port = ew->port;
3147
3148	mlx4_ib_sl2vl_update(mdev, port);
3149
3150	kfree(ew);
3151}
3152
3153void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
3154				     int port)
3155{
3156	struct ib_event_work *ew;
3157
3158	ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3159	if (ew) {
3160		INIT_WORK(&ew->work, ib_sl2vl_update_work);
3161		ew->port = port;
3162		ew->ib_dev = ibdev;
3163		queue_work(wq, &ew->work);
3164	} else {
3165		pr_err("failed to allocate memory for sl2vl update work\n");
3166	}
3167}
3168
3169static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
3170			  enum mlx4_dev_event event, unsigned long param)
3171{
3172	struct ib_event ibev;
3173	struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
3174	struct mlx4_eqe *eqe = NULL;
3175	struct ib_event_work *ew;
3176	int p = 0;
3177
3178	if (mlx4_is_bonded(dev) &&
3179	    ((event == MLX4_DEV_EVENT_PORT_UP) ||
3180	    (event == MLX4_DEV_EVENT_PORT_DOWN))) {
3181		ew = kmalloc(sizeof(*ew), GFP_ATOMIC);
3182		if (!ew)
3183			return;
3184		INIT_WORK(&ew->work, handle_bonded_port_state_event);
3185		ew->ib_dev = ibdev;
3186		queue_work(wq, &ew->work);
3187		return;
3188	}
3189
3190	if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE)
3191		eqe = (struct mlx4_eqe *)param;
3192	else
3193		p = (int) param;
3194
3195	switch (event) {
3196	case MLX4_DEV_EVENT_PORT_UP:
3197		if (p > ibdev->num_ports)
3198			return;
3199		if (!mlx4_is_slave(dev) &&
3200		    rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
3201			IB_LINK_LAYER_INFINIBAND) {
3202			if (mlx4_is_master(dev))
3203				mlx4_ib_invalidate_all_guid_record(ibdev, p);
3204			if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST &&
3205			    !(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT))
3206				mlx4_sched_ib_sl2vl_update_work(ibdev, p);
3207		}
3208		ibev.event = IB_EVENT_PORT_ACTIVE;
3209		break;
3210
3211	case MLX4_DEV_EVENT_PORT_DOWN:
3212		if (p > ibdev->num_ports)
3213			return;
3214		ibev.event = IB_EVENT_PORT_ERR;
3215		break;
3216
3217	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
3218		ibdev->ib_active = false;
3219		ibev.event = IB_EVENT_DEVICE_FATAL;
3220		mlx4_ib_handle_catas_error(ibdev);
3221		break;
3222
3223	case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
3224		ew = kmalloc(sizeof *ew, GFP_ATOMIC);
3225		if (!ew) {
3226			pr_err("failed to allocate memory for events work\n");
3227			break;
3228		}
3229
3230		INIT_WORK(&ew->work, handle_port_mgmt_change_event);
3231		memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
3232		ew->ib_dev = ibdev;
3233		/* need to queue only for port owner, which uses GEN_EQE */
3234		if (mlx4_is_master(dev))
3235			queue_work(wq, &ew->work);
3236		else
3237			handle_port_mgmt_change_event(&ew->work);
3238		return;
3239
3240	case MLX4_DEV_EVENT_SLAVE_INIT:
3241		/* here, p is the slave id */
3242		do_slave_init(ibdev, p, 1);
3243		if (mlx4_is_master(dev)) {
3244			int i;
3245
3246			for (i = 1; i <= ibdev->num_ports; i++) {
3247				if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
3248					== IB_LINK_LAYER_INFINIBAND)
3249					mlx4_ib_slave_alias_guid_event(ibdev,
3250								       p, i,
3251								       1);
3252			}
3253		}
3254		return;
3255
3256	case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
3257		if (mlx4_is_master(dev)) {
3258			int i;
3259
3260			for (i = 1; i <= ibdev->num_ports; i++) {
3261				if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
3262					== IB_LINK_LAYER_INFINIBAND)
3263					mlx4_ib_slave_alias_guid_event(ibdev,
3264								       p, i,
3265								       0);
3266			}
3267		}
3268		/* here, p is the slave id */
3269		do_slave_init(ibdev, p, 0);
3270		return;
3271
3272	default:
3273		return;
3274	}
3275
3276	ibev.device	      = ibdev_ptr;
3277	ibev.element.port_num = mlx4_is_bonded(ibdev->dev) ? 1 : (u8)p;
3278
3279	ib_dispatch_event(&ibev);
3280}
3281
3282static struct mlx4_interface mlx4_ib_interface = {
3283	.add		= mlx4_ib_add,
3284	.remove		= mlx4_ib_remove,
3285	.event		= mlx4_ib_event,
3286	.protocol	= MLX4_PROT_IB_IPV6,
3287	.flags		= MLX4_INTFF_BONDING
3288};
3289
3290static int __init mlx4_ib_init(void)
3291{
3292	int err;
3293
3294	wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM);
3295	if (!wq)
3296		return -ENOMEM;
3297
3298	err = mlx4_ib_mcg_init();
3299	if (err)
3300		goto clean_wq;
3301
3302	err = mlx4_register_interface(&mlx4_ib_interface);
3303	if (err)
3304		goto clean_mcg;
3305
3306	return 0;
3307
3308clean_mcg:
3309	mlx4_ib_mcg_destroy();
3310
3311clean_wq:
3312	destroy_workqueue(wq);
3313	return err;
3314}
3315
3316static void __exit mlx4_ib_cleanup(void)
3317{
3318	mlx4_unregister_interface(&mlx4_ib_interface);
3319	mlx4_ib_mcg_destroy();
3320	destroy_workqueue(wq);
3321}
3322
3323module_init_order(mlx4_ib_init, SI_ORDER_THIRD);
3324module_exit(mlx4_ib_cleanup);
3325
3326static int
3327mlx4ib_evhand(module_t mod, int event, void *arg)
3328{
3329	return (0);
3330}
3331
3332static moduledata_t mlx4ib_mod = {
3333	.name = "mlx4ib",
3334	.evhand = mlx4ib_evhand,
3335};
3336
3337DECLARE_MODULE(mlx4ib, mlx4ib_mod, SI_SUB_LAST, SI_ORDER_ANY);
3338MODULE_DEPEND(mlx4ib, mlx4, 1, 1, 1);
3339MODULE_DEPEND(mlx4ib, ibcore, 1, 1, 1);
3340MODULE_DEPEND(mlx4ib, linuxkpi, 1, 1, 1);
3341