1255932Salfred/*
2255932Salfred * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3255932Salfred *
4255932Salfred * This software is available to you under a choice of one of two
5255932Salfred * licenses.  You may choose to be licensed under the terms of the GNU
6255932Salfred * General Public License (GPL) Version 2, available from the file
7255932Salfred * COPYING in the main directory of this source tree, or the
8255932Salfred * OpenIB.org BSD license below:
9255932Salfred *
10255932Salfred *     Redistribution and use in source and binary forms, with or
11255932Salfred *     without modification, are permitted provided that the following
12255932Salfred *     conditions are met:
13255932Salfred *
14255932Salfred *      - Redistributions of source code must retain the above
15255932Salfred *        copyright notice, this list of conditions and the following
16255932Salfred *        disclaimer.
17255932Salfred *
18255932Salfred *      - Redistributions in binary form must reproduce the above
19255932Salfred *        copyright notice, this list of conditions and the following
20255932Salfred *        disclaimer in the documentation and/or other materials
21255932Salfred *        provided with the distribution.
22255932Salfred *
23255932Salfred * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24255932Salfred * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25255932Salfred * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26255932Salfred * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27255932Salfred * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28255932Salfred * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29255932Salfred * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30255932Salfred * SOFTWARE.
31255932Salfred */
32255932Salfred
33255932Salfred#include <rdma/ib_mad.h>
34255932Salfred#include <rdma/ib_smi.h>
35255932Salfred#include <rdma/ib_cache.h>
36255932Salfred#include <rdma/ib_sa.h>
37255932Salfred
38306486Shselasky#include <dev/mlx4/cmd.h>
39278886Shselasky#include <linux/rbtree.h>
40255932Salfred#include <linux/delay.h>
41255932Salfred
42255932Salfred#include "mlx4_ib.h"
43255932Salfred
44255932Salfred#define MAX_VFS		80
45255932Salfred#define MAX_PEND_REQS_PER_FUNC 4
46255932Salfred#define MAD_TIMEOUT_MS	2000
47255932Salfred
48255932Salfred#define mcg_warn(fmt, arg...)	pr_warn("MCG WARNING: " fmt, ##arg)
49255932Salfred#define mcg_error(fmt, arg...)	pr_err(fmt, ##arg)
50255932Salfred#define mcg_warn_group(group, format, arg...) \
51255932Salfred	pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
52255932Salfred	(group)->name, group->demux->port, ## arg)
53255932Salfred
54331769Shselasky#define mcg_debug_group(group, format, arg...) \
55331769Shselasky	pr_debug("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
56331769Shselasky		 (group)->name, (group)->demux->port, ## arg)
57331769Shselasky
58255932Salfred#define mcg_error_group(group, format, arg...) \
59255932Salfred	pr_err("  %16s: " format, (group)->name, ## arg)
60255932Salfred
61278886Shselasky
62255932Salfredstatic union ib_gid mgid0;
63255932Salfred
64255932Salfredstatic struct workqueue_struct *clean_wq;
65255932Salfred
66255932Salfredenum mcast_state {
67255932Salfred	MCAST_NOT_MEMBER = 0,
68255932Salfred	MCAST_MEMBER,
69255932Salfred};
70255932Salfred
71255932Salfredenum mcast_group_state {
72255932Salfred	MCAST_IDLE,
73255932Salfred	MCAST_JOIN_SENT,
74255932Salfred	MCAST_LEAVE_SENT,
75255932Salfred	MCAST_RESP_READY
76255932Salfred};
77255932Salfred
78255932Salfredstruct mcast_member {
79255932Salfred	enum mcast_state state;
80255932Salfred	uint8_t			join_state;
81255932Salfred	int			num_pend_reqs;
82255932Salfred	struct list_head	pending;
83255932Salfred};
84255932Salfred
85255932Salfredstruct ib_sa_mcmember_data {
86255932Salfred	union ib_gid	mgid;
87255932Salfred	union ib_gid	port_gid;
88255932Salfred	__be32		qkey;
89255932Salfred	__be16		mlid;
90255932Salfred	u8		mtusel_mtu;
91255932Salfred	u8		tclass;
92255932Salfred	__be16		pkey;
93255932Salfred	u8		ratesel_rate;
94255932Salfred	u8		lifetmsel_lifetm;
95255932Salfred	__be32		sl_flowlabel_hoplimit;
96255932Salfred	u8		scope_join_state;
97255932Salfred	u8		proxy_join;
98255932Salfred	u8		reserved[2];
99331769Shselasky} __packed __aligned(4);
100255932Salfred
101255932Salfredstruct mcast_group {
102255932Salfred	struct ib_sa_mcmember_data rec;
103255932Salfred	struct rb_node		node;
104255932Salfred	struct list_head	mgid0_list;
105255932Salfred	struct mlx4_ib_demux_ctx *demux;
106255932Salfred	struct mcast_member	func[MAX_VFS];
107255932Salfred	struct mutex		lock;
108255932Salfred	struct work_struct	work;
109255932Salfred	struct list_head	pending_list;
110255932Salfred	int			members[3];
111255932Salfred	enum mcast_group_state	state;
112255932Salfred	enum mcast_group_state	prev_state;
113255932Salfred	struct ib_sa_mad	response_sa_mad;
114255932Salfred	__be64			last_req_tid;
115255932Salfred
116255932Salfred	char			name[33]; /* MGID string */
117255932Salfred	struct device_attribute	dentry;
118255932Salfred
119255932Salfred	/* refcount is the reference count for the following:
120255932Salfred	   1. Each queued request
121255932Salfred	   2. Each invocation of the worker thread
122255932Salfred	   3. Membership of the port at the SA
123255932Salfred	*/
124255932Salfred	atomic_t		refcount;
125255932Salfred
126255932Salfred	/* delayed work to clean pending SM request */
127255932Salfred	struct delayed_work	timeout_work;
128255932Salfred	struct list_head	cleanup_list;
129255932Salfred};
130255932Salfred
131255932Salfredstruct mcast_req {
132255932Salfred	int			func;
133255932Salfred	struct ib_sa_mad	sa_mad;
134255932Salfred	struct list_head	group_list;
135255932Salfred	struct list_head	func_list;
136255932Salfred	struct mcast_group	*group;
137255932Salfred	int			clean;
138255932Salfred};
139255932Salfred
140255932Salfred
141255932Salfred#define safe_atomic_dec(ref) \
142296382Shselasky	do {\
143296382Shselasky		if (atomic_dec_and_test(ref)) \
144255932Salfred			mcg_warn_group(group, "did not expect to reach zero\n"); \
145255932Salfred	} while (0)
146255932Salfred
147255932Salfredstatic const char *get_state_string(enum mcast_group_state state)
148255932Salfred{
149255932Salfred	switch (state) {
150255932Salfred	case MCAST_IDLE:
151255932Salfred		return "MCAST_IDLE";
152255932Salfred	case MCAST_JOIN_SENT:
153255932Salfred		return "MCAST_JOIN_SENT";
154255932Salfred	case MCAST_LEAVE_SENT:
155255932Salfred		return "MCAST_LEAVE_SENT";
156255932Salfred	case MCAST_RESP_READY:
157255932Salfred		return "MCAST_RESP_READY";
158255932Salfred	}
159255932Salfred	return "Invalid State";
160255932Salfred}
161255932Salfred
162255932Salfredstatic struct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx,
163255932Salfred				      union ib_gid *mgid)
164255932Salfred{
165255932Salfred	struct rb_node *node = ctx->mcg_table.rb_node;
166255932Salfred	struct mcast_group *group;
167255932Salfred	int ret;
168255932Salfred
169255932Salfred	while (node) {
170255932Salfred		group = rb_entry(node, struct mcast_group, node);
171255932Salfred		ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
172255932Salfred		if (!ret)
173255932Salfred			return group;
174255932Salfred
175255932Salfred		if (ret < 0)
176255932Salfred			node = node->rb_left;
177255932Salfred		else
178255932Salfred			node = node->rb_right;
179255932Salfred	}
180255932Salfred	return NULL;
181255932Salfred}
182255932Salfred
183255932Salfredstatic struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx,
184255932Salfred					struct mcast_group *group)
185255932Salfred{
186255932Salfred	struct rb_node **link = &ctx->mcg_table.rb_node;
187255932Salfred	struct rb_node *parent = NULL;
188255932Salfred	struct mcast_group *cur_group;
189255932Salfred	int ret;
190255932Salfred
191255932Salfred	while (*link) {
192255932Salfred		parent = *link;
193255932Salfred		cur_group = rb_entry(parent, struct mcast_group, node);
194255932Salfred
195255932Salfred		ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
196255932Salfred			     sizeof group->rec.mgid);
197255932Salfred		if (ret < 0)
198255932Salfred			link = &(*link)->rb_left;
199255932Salfred		else if (ret > 0)
200255932Salfred			link = &(*link)->rb_right;
201255932Salfred		else
202255932Salfred			return cur_group;
203255932Salfred	}
204255932Salfred	rb_link_node(&group->node, parent, link);
205255932Salfred	rb_insert_color(&group->node, &ctx->mcg_table);
206255932Salfred	return NULL;
207255932Salfred}
208255932Salfred
209255932Salfredstatic int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
210255932Salfred{
211255932Salfred	struct mlx4_ib_dev *dev = ctx->dev;
212255932Salfred	struct ib_ah_attr	ah_attr;
213331769Shselasky	unsigned long flags;
214255932Salfred
215331769Shselasky	spin_lock_irqsave(&dev->sm_lock, flags);
216255932Salfred	if (!dev->sm_ah[ctx->port - 1]) {
217255932Salfred		/* port is not yet Active, sm_ah not ready */
218331769Shselasky		spin_unlock_irqrestore(&dev->sm_lock, flags);
219255932Salfred		return -EAGAIN;
220255932Salfred	}
221255932Salfred	mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
222331769Shselasky	spin_unlock_irqrestore(&dev->sm_lock, flags);
223331769Shselasky	return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
224331769Shselasky				    ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
225331769Shselasky				    &ah_attr, NULL, 0xffff, mad);
226255932Salfred}
227255932Salfred
228255932Salfredstatic int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
229255932Salfred			     struct ib_mad *mad)
230255932Salfred{
231255932Salfred	struct mlx4_ib_dev *dev = ctx->dev;
232255932Salfred	struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1];
233255932Salfred	struct ib_wc wc;
234255932Salfred	struct ib_ah_attr ah_attr;
235255932Salfred
236255932Salfred	/* Our agent might not yet be registered when mads start to arrive */
237255932Salfred	if (!agent)
238255932Salfred		return -EAGAIN;
239255932Salfred
240255932Salfred	ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
241255932Salfred
242255932Salfred	if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index))
243255932Salfred		return -EINVAL;
244255932Salfred	wc.sl = 0;
245255932Salfred	wc.dlid_path_bits = 0;
246255932Salfred	wc.port_num = ctx->port;
247255932Salfred	wc.slid = ah_attr.dlid;  /* opensm lid */
248255932Salfred	wc.src_qp = 1;
249255932Salfred	return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad);
250255932Salfred}
251255932Salfred
252255932Salfredstatic int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad)
253255932Salfred{
254255932Salfred	struct ib_sa_mad mad;
255255932Salfred	struct ib_sa_mcmember_data *sa_mad_data = (struct ib_sa_mcmember_data *)&mad.data;
256255932Salfred	int ret;
257255932Salfred
258255932Salfred	/* we rely on a mad request as arrived from a VF */
259255932Salfred	memcpy(&mad, sa_mad, sizeof mad);
260255932Salfred
261255932Salfred	/* fix port GID to be the real one (slave 0) */
262255932Salfred	sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0];
263255932Salfred
264255932Salfred	/* assign our own TID */
265255932Salfred	mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
266255932Salfred	group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
267255932Salfred
268255932Salfred	ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
269255932Salfred	/* set timeout handler */
270255932Salfred	if (!ret) {
271255932Salfred		/* calls mlx4_ib_mcg_timeout_handler */
272255932Salfred		queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
273255932Salfred				msecs_to_jiffies(MAD_TIMEOUT_MS));
274255932Salfred	}
275255932Salfred
276255932Salfred	return ret;
277255932Salfred}
278255932Salfred
279255932Salfredstatic int send_leave_to_wire(struct mcast_group *group, u8 join_state)
280255932Salfred{
281255932Salfred	struct ib_sa_mad mad;
282255932Salfred	struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
283255932Salfred	int ret;
284255932Salfred
285255932Salfred	memset(&mad, 0, sizeof mad);
286255932Salfred	mad.mad_hdr.base_version = 1;
287255932Salfred	mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
288255932Salfred	mad.mad_hdr.class_version = 2;
289255932Salfred	mad.mad_hdr.method = IB_SA_METHOD_DELETE;
290255932Salfred	mad.mad_hdr.status = cpu_to_be16(0);
291255932Salfred	mad.mad_hdr.class_specific = cpu_to_be16(0);
292255932Salfred	mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
293255932Salfred	group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
294255932Salfred	mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
295255932Salfred	mad.mad_hdr.attr_mod = cpu_to_be32(0);
296255932Salfred	mad.sa_hdr.sm_key = 0x0;
297255932Salfred	mad.sa_hdr.attr_offset = cpu_to_be16(7);
298255932Salfred	mad.sa_hdr.comp_mask = IB_SA_MCMEMBER_REC_MGID |
299255932Salfred		IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE;
300255932Salfred
301255932Salfred	*sa_data = group->rec;
302255932Salfred	sa_data->scope_join_state = join_state;
303255932Salfred
304255932Salfred	ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
305255932Salfred	if (ret)
306255932Salfred		group->state = MCAST_IDLE;
307255932Salfred
308255932Salfred	/* set timeout handler */
309255932Salfred	if (!ret) {
310255932Salfred		/* calls mlx4_ib_mcg_timeout_handler */
311255932Salfred		queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
312255932Salfred				msecs_to_jiffies(MAD_TIMEOUT_MS));
313255932Salfred	}
314255932Salfred
315255932Salfred	return ret;
316255932Salfred}
317255932Salfred
318255932Salfredstatic int send_reply_to_slave(int slave, struct mcast_group *group,
319255932Salfred		struct ib_sa_mad *req_sa_mad, u16 status)
320255932Salfred{
321255932Salfred	struct ib_sa_mad mad;
322255932Salfred	struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
323255932Salfred	struct ib_sa_mcmember_data *req_sa_data = (struct ib_sa_mcmember_data *)&req_sa_mad->data;
324255932Salfred	int ret;
325255932Salfred
326255932Salfred	memset(&mad, 0, sizeof mad);
327255932Salfred	mad.mad_hdr.base_version = 1;
328255932Salfred	mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
329255932Salfred	mad.mad_hdr.class_version = 2;
330255932Salfred	mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
331255932Salfred	mad.mad_hdr.status = cpu_to_be16(status);
332255932Salfred	mad.mad_hdr.class_specific = cpu_to_be16(0);
333255932Salfred	mad.mad_hdr.tid = req_sa_mad->mad_hdr.tid;
334255932Salfred	*(u8 *)&mad.mad_hdr.tid = 0; /* resetting tid to 0 */
335255932Salfred	mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
336255932Salfred	mad.mad_hdr.attr_mod = cpu_to_be32(0);
337255932Salfred	mad.sa_hdr.sm_key = req_sa_mad->sa_hdr.sm_key;
338255932Salfred	mad.sa_hdr.attr_offset = cpu_to_be16(7);
339255932Salfred	mad.sa_hdr.comp_mask = 0; /* ignored on responses, see IBTA spec */
340255932Salfred
341255932Salfred	*sa_data = group->rec;
342255932Salfred
343255932Salfred	/* reconstruct VF's requested join_state and port_gid */
344255932Salfred	sa_data->scope_join_state &= 0xf0;
345255932Salfred	sa_data->scope_join_state |= (group->func[slave].join_state & 0x0f);
346255932Salfred	memcpy(&sa_data->port_gid, &req_sa_data->port_gid, sizeof req_sa_data->port_gid);
347255932Salfred
348255932Salfred	ret = send_mad_to_slave(slave, group->demux, (struct ib_mad *)&mad);
349255932Salfred	return ret;
350255932Salfred}
351255932Salfred
352255932Salfredstatic int check_selector(ib_sa_comp_mask comp_mask,
353255932Salfred			  ib_sa_comp_mask selector_mask,
354255932Salfred			  ib_sa_comp_mask value_mask,
355255932Salfred			  u8 src_value, u8 dst_value)
356255932Salfred{
357255932Salfred	int err;
358255932Salfred	u8 selector = dst_value >> 6;
359255932Salfred	dst_value &= 0x3f;
360255932Salfred	src_value &= 0x3f;
361255932Salfred
362255932Salfred	if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
363255932Salfred		return 0;
364255932Salfred
365255932Salfred	switch (selector) {
366255932Salfred	case IB_SA_GT:
367255932Salfred		err = (src_value <= dst_value);
368255932Salfred		break;
369255932Salfred	case IB_SA_LT:
370255932Salfred		err = (src_value >= dst_value);
371255932Salfred		break;
372255932Salfred	case IB_SA_EQ:
373255932Salfred		err = (src_value != dst_value);
374255932Salfred		break;
375255932Salfred	default:
376255932Salfred		err = 0;
377255932Salfred		break;
378255932Salfred	}
379255932Salfred
380255932Salfred	return err;
381255932Salfred}
382255932Salfred
383255932Salfredstatic u16 cmp_rec(struct ib_sa_mcmember_data *src,
384255932Salfred		   struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask)
385255932Salfred{
386255932Salfred	/* src is group record, dst is request record */
387255932Salfred	/* MGID must already match */
388255932Salfred	/* Port_GID we always replace to our Port_GID, so it is a match */
389255932Salfred
390255932Salfred#define MAD_STATUS_REQ_INVALID 0x0200
391255932Salfred	if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
392255932Salfred		return MAD_STATUS_REQ_INVALID;
393255932Salfred	if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
394255932Salfred		return MAD_STATUS_REQ_INVALID;
395255932Salfred	if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
396255932Salfred				 IB_SA_MCMEMBER_REC_MTU,
397255932Salfred				 src->mtusel_mtu, dst->mtusel_mtu))
398255932Salfred		return MAD_STATUS_REQ_INVALID;
399255932Salfred	if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
400255932Salfred	    src->tclass != dst->tclass)
401255932Salfred		return MAD_STATUS_REQ_INVALID;
402255932Salfred	if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
403255932Salfred		return MAD_STATUS_REQ_INVALID;
404255932Salfred	if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
405255932Salfred				 IB_SA_MCMEMBER_REC_RATE,
406255932Salfred				 src->ratesel_rate, dst->ratesel_rate))
407255932Salfred		return MAD_STATUS_REQ_INVALID;
408255932Salfred	if (check_selector(comp_mask,
409255932Salfred				 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
410255932Salfred				 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
411255932Salfred				 src->lifetmsel_lifetm, dst->lifetmsel_lifetm))
412255932Salfred		return MAD_STATUS_REQ_INVALID;
413255932Salfred	if (comp_mask & IB_SA_MCMEMBER_REC_SL &&
414255932Salfred			(be32_to_cpu(src->sl_flowlabel_hoplimit) & 0xf0000000) !=
415255932Salfred			(be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0xf0000000))
416255932Salfred		return MAD_STATUS_REQ_INVALID;
417255932Salfred	if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
418255932Salfred			(be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x0fffff00) !=
419255932Salfred			(be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x0fffff00))
420255932Salfred		return MAD_STATUS_REQ_INVALID;
421255932Salfred	if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
422255932Salfred			(be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x000000ff) !=
423255932Salfred			(be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x000000ff))
424255932Salfred		return MAD_STATUS_REQ_INVALID;
425255932Salfred	if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE &&
426255932Salfred			(src->scope_join_state & 0xf0) !=
427255932Salfred			(dst->scope_join_state & 0xf0))
428255932Salfred		return MAD_STATUS_REQ_INVALID;
429255932Salfred
430255932Salfred	/* join_state checked separately, proxy_join ignored */
431255932Salfred
432255932Salfred	return 0;
433255932Salfred}
434255932Salfred
435255932Salfred/* release group, return 1 if this was last release and group is destroyed
436255932Salfred * timout work is canceled sync */
437255932Salfredstatic int release_group(struct mcast_group *group, int from_timeout_handler)
438255932Salfred{
439255932Salfred	struct mlx4_ib_demux_ctx *ctx = group->demux;
440255932Salfred	int nzgroup;
441255932Salfred
442255932Salfred	mutex_lock(&ctx->mcg_table_lock);
443255932Salfred	mutex_lock(&group->lock);
444255932Salfred	if (atomic_dec_and_test(&group->refcount)) {
445255932Salfred		if (!from_timeout_handler) {
446255932Salfred			if (group->state != MCAST_IDLE &&
447255932Salfred			    !cancel_delayed_work(&group->timeout_work)) {
448255932Salfred				atomic_inc(&group->refcount);
449255932Salfred				mutex_unlock(&group->lock);
450255932Salfred				mutex_unlock(&ctx->mcg_table_lock);
451255932Salfred				return 0;
452255932Salfred			}
453255932Salfred		}
454255932Salfred
455255932Salfred		nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0);
456255932Salfred		if (nzgroup)
457255932Salfred			del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
458255932Salfred		if (!list_empty(&group->pending_list))
459255932Salfred			mcg_warn_group(group, "releasing a group with non empty pending list\n");
460255932Salfred		if (nzgroup)
461255932Salfred			rb_erase(&group->node, &ctx->mcg_table);
462255932Salfred		list_del_init(&group->mgid0_list);
463255932Salfred		mutex_unlock(&group->lock);
464255932Salfred		mutex_unlock(&ctx->mcg_table_lock);
465255932Salfred		kfree(group);
466255932Salfred		return 1;
467255932Salfred	} else {
468255932Salfred		mutex_unlock(&group->lock);
469255932Salfred		mutex_unlock(&ctx->mcg_table_lock);
470255932Salfred	}
471255932Salfred	return 0;
472255932Salfred}
473255932Salfred
474255932Salfredstatic void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
475255932Salfred{
476255932Salfred	int i;
477255932Salfred
478255932Salfred	for (i = 0; i < 3; i++, join_state >>= 1)
479255932Salfred		if (join_state & 0x1)
480255932Salfred			group->members[i] += inc;
481255932Salfred}
482255932Salfred
483255932Salfredstatic u8 get_leave_state(struct mcast_group *group)
484255932Salfred{
485255932Salfred	u8 leave_state = 0;
486255932Salfred	int i;
487255932Salfred
488255932Salfred	for (i = 0; i < 3; i++)
489255932Salfred		if (!group->members[i])
490255932Salfred			leave_state |= (1 << i);
491255932Salfred
492331769Shselasky	return leave_state & (group->rec.scope_join_state & 0xf);
493255932Salfred}
494255932Salfred
495255932Salfredstatic int join_group(struct mcast_group *group, int slave, u8 join_mask)
496255932Salfred{
497255932Salfred	int ret = 0;
498255932Salfred	u8 join_state;
499255932Salfred
500255932Salfred	/* remove bits that slave is already member of, and adjust */
501255932Salfred	join_state = join_mask & (~group->func[slave].join_state);
502255932Salfred	adjust_membership(group, join_state, 1);
503255932Salfred	group->func[slave].join_state |= join_state;
504255932Salfred	if (group->func[slave].state != MCAST_MEMBER && join_state) {
505255932Salfred		group->func[slave].state = MCAST_MEMBER;
506255932Salfred		ret = 1;
507255932Salfred	}
508255932Salfred	return ret;
509255932Salfred}
510255932Salfred
511255932Salfredstatic int leave_group(struct mcast_group *group, int slave, u8 leave_state)
512255932Salfred{
513255932Salfred	int ret = 0;
514255932Salfred
515255932Salfred	adjust_membership(group, leave_state, -1);
516255932Salfred	group->func[slave].join_state &= ~leave_state;
517255932Salfred	if (!group->func[slave].join_state) {
518255932Salfred		group->func[slave].state = MCAST_NOT_MEMBER;
519255932Salfred		ret = 1;
520255932Salfred	}
521255932Salfred	return ret;
522255932Salfred}
523255932Salfred
524255932Salfredstatic int check_leave(struct mcast_group *group, int slave, u8 leave_mask)
525255932Salfred{
526255932Salfred	if (group->func[slave].state != MCAST_MEMBER)
527255932Salfred		return MAD_STATUS_REQ_INVALID;
528255932Salfred
529255932Salfred	/* make sure we're not deleting unset bits */
530255932Salfred	if (~group->func[slave].join_state & leave_mask)
531255932Salfred		return MAD_STATUS_REQ_INVALID;
532255932Salfred
533255932Salfred	if (!leave_mask)
534255932Salfred		return MAD_STATUS_REQ_INVALID;
535255932Salfred
536255932Salfred	return 0;
537255932Salfred}
538255932Salfred
539255932Salfredstatic void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
540255932Salfred{
541255932Salfred	struct delayed_work *delay = to_delayed_work(work);
542255932Salfred	struct mcast_group *group;
543255932Salfred	struct mcast_req *req = NULL;
544255932Salfred
545255932Salfred	group = container_of(delay, typeof(*group), timeout_work);
546255932Salfred
547255932Salfred	mutex_lock(&group->lock);
548255932Salfred	if (group->state == MCAST_JOIN_SENT) {
549255932Salfred		if (!list_empty(&group->pending_list)) {
550255932Salfred			req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
551255932Salfred			list_del(&req->group_list);
552255932Salfred			list_del(&req->func_list);
553255932Salfred			--group->func[req->func].num_pend_reqs;
554255932Salfred			mutex_unlock(&group->lock);
555255932Salfred			kfree(req);
556255932Salfred			if (memcmp(&group->rec.mgid, &mgid0, sizeof mgid0)) {
557255932Salfred				if (release_group(group, 1))
558255932Salfred					return;
559255932Salfred			} else {
560255932Salfred				kfree(group);
561255932Salfred				return;
562255932Salfred			}
563255932Salfred			mutex_lock(&group->lock);
564255932Salfred		} else
565255932Salfred			mcg_warn_group(group, "DRIVER BUG\n");
566255932Salfred	} else if (group->state == MCAST_LEAVE_SENT) {
567331769Shselasky		if (group->rec.scope_join_state & 0xf)
568331769Shselasky			group->rec.scope_join_state &= 0xf0;
569255932Salfred		group->state = MCAST_IDLE;
570255932Salfred		mutex_unlock(&group->lock);
571255932Salfred		if (release_group(group, 1))
572255932Salfred			return;
573255932Salfred		mutex_lock(&group->lock);
574255932Salfred	} else
575255932Salfred		mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state));
576255932Salfred	group->state = MCAST_IDLE;
577255932Salfred	atomic_inc(&group->refcount);
578278886Shselasky	if (!queue_work(group->demux->mcg_wq, &group->work))
579296382Shselasky		safe_atomic_dec(&group->refcount);
580255932Salfred
581255932Salfred	mutex_unlock(&group->lock);
582255932Salfred}
583255932Salfred
584255932Salfredstatic int handle_leave_req(struct mcast_group *group, u8 leave_mask,
585255932Salfred			    struct mcast_req *req)
586255932Salfred{
587255932Salfred	u16 status;
588255932Salfred
589255932Salfred	if (req->clean)
590255932Salfred		leave_mask = group->func[req->func].join_state;
591255932Salfred
592255932Salfred	status = check_leave(group, req->func, leave_mask);
593255932Salfred	if (!status)
594255932Salfred		leave_group(group, req->func, leave_mask);
595255932Salfred
596255932Salfred	if (!req->clean)
597255932Salfred		send_reply_to_slave(req->func, group, &req->sa_mad, status);
598255932Salfred	--group->func[req->func].num_pend_reqs;
599255932Salfred	list_del(&req->group_list);
600255932Salfred	list_del(&req->func_list);
601255932Salfred	kfree(req);
602255932Salfred	return 1;
603255932Salfred}
604255932Salfred
605255932Salfredstatic int handle_join_req(struct mcast_group *group, u8 join_mask,
606255932Salfred			   struct mcast_req *req)
607255932Salfred{
608331769Shselasky	u8 group_join_state = group->rec.scope_join_state & 0xf;
609255932Salfred	int ref = 0;
610255932Salfred	u16 status;
611255932Salfred	struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
612255932Salfred
613255932Salfred	if (join_mask == (group_join_state & join_mask)) {
614255932Salfred		/* port's membership need not change */
615255932Salfred		status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask);
616255932Salfred		if (!status)
617255932Salfred			join_group(group, req->func, join_mask);
618255932Salfred
619255932Salfred		--group->func[req->func].num_pend_reqs;
620255932Salfred		send_reply_to_slave(req->func, group, &req->sa_mad, status);
621255932Salfred		list_del(&req->group_list);
622255932Salfred		list_del(&req->func_list);
623255932Salfred		kfree(req);
624255932Salfred		++ref;
625255932Salfred	} else {
626255932Salfred		/* port's membership needs to be updated */
627255932Salfred		group->prev_state = group->state;
628255932Salfred		if (send_join_to_wire(group, &req->sa_mad)) {
629255932Salfred			--group->func[req->func].num_pend_reqs;
630255932Salfred			list_del(&req->group_list);
631255932Salfred			list_del(&req->func_list);
632255932Salfred			kfree(req);
633255932Salfred			ref = 1;
634255932Salfred			group->state = group->prev_state;
635255932Salfred		} else
636255932Salfred			group->state = MCAST_JOIN_SENT;
637255932Salfred	}
638255932Salfred
639255932Salfred	return ref;
640255932Salfred}
641255932Salfred
642255932Salfredstatic void mlx4_ib_mcg_work_handler(struct work_struct *work)
643255932Salfred{
644255932Salfred	struct mcast_group *group;
645255932Salfred	struct mcast_req *req = NULL;
646255932Salfred	struct ib_sa_mcmember_data *sa_data;
647255932Salfred	u8 req_join_state;
648255932Salfred	int rc = 1; /* release_count - this is for the scheduled work */
649255932Salfred	u16 status;
650255932Salfred	u8 method;
651255932Salfred
652255932Salfred	group = container_of(work, typeof(*group), work);
653255932Salfred
654255932Salfred	mutex_lock(&group->lock);
655255932Salfred
656255932Salfred	/* First, let's see if a response from SM is waiting regarding this group.
657255932Salfred	 * If so, we need to update the group's REC. If this is a bad response, we
658255932Salfred	 * may need to send a bad response to a VF waiting for it. If VF is waiting
659255932Salfred	 * and this is a good response, the VF will be answered later in this func. */
660255932Salfred	if (group->state == MCAST_RESP_READY) {
661255932Salfred		/* cancels mlx4_ib_mcg_timeout_handler */
662255932Salfred		cancel_delayed_work(&group->timeout_work);
663255932Salfred		status = be16_to_cpu(group->response_sa_mad.mad_hdr.status);
664255932Salfred		method = group->response_sa_mad.mad_hdr.method;
665255932Salfred		if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) {
666255932Salfred			mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n",
667278886Shselasky				(long long)be64_to_cpu(
668278886Shselasky				    group->response_sa_mad.mad_hdr.tid),
669278886Shselasky				(long long)be64_to_cpu(group->last_req_tid));
670255932Salfred			group->state = group->prev_state;
671255932Salfred			goto process_requests;
672255932Salfred		}
673255932Salfred		if (status) {
674255932Salfred			if (!list_empty(&group->pending_list))
675255932Salfred				req = list_first_entry(&group->pending_list,
676255932Salfred						struct mcast_req, group_list);
677278894Shselasky			if (method == IB_MGMT_METHOD_GET_RESP) {
678255932Salfred					if (req) {
679255932Salfred						send_reply_to_slave(req->func, group, &req->sa_mad, status);
680255932Salfred						--group->func[req->func].num_pend_reqs;
681255932Salfred						list_del(&req->group_list);
682255932Salfred						list_del(&req->func_list);
683255932Salfred						kfree(req);
684255932Salfred						++rc;
685255932Salfred					} else
686255932Salfred						mcg_warn_group(group, "no request for failed join\n");
687255932Salfred			} else if (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing)
688255932Salfred				++rc;
689255932Salfred		} else {
690255932Salfred			u8 resp_join_state;
691255932Salfred			u8 cur_join_state;
692255932Salfred
693255932Salfred			resp_join_state = ((struct ib_sa_mcmember_data *)
694331769Shselasky						group->response_sa_mad.data)->scope_join_state & 0xf;
695331769Shselasky			cur_join_state = group->rec.scope_join_state & 0xf;
696255932Salfred
697255932Salfred			if (method == IB_MGMT_METHOD_GET_RESP) {
698331769Shselasky				/* successfull join */
699255932Salfred				if (!cur_join_state && resp_join_state)
700255932Salfred					--rc;
701255932Salfred			} else if (!resp_join_state)
702255932Salfred					++rc;
703255932Salfred			memcpy(&group->rec, group->response_sa_mad.data, sizeof group->rec);
704255932Salfred		}
705255932Salfred		group->state = MCAST_IDLE;
706255932Salfred	}
707255932Salfred
708255932Salfredprocess_requests:
709255932Salfred	/* We should now go over pending join/leave requests, as long as we are idle. */
710255932Salfred	while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) {
711255932Salfred		req = list_first_entry(&group->pending_list, struct mcast_req,
712255932Salfred				       group_list);
713255932Salfred		sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
714331769Shselasky		req_join_state = sa_data->scope_join_state & 0xf;
715255932Salfred
716255932Salfred		/* For a leave request, we will immediately answer the VF, and
717255932Salfred		 * update our internal counters. The actual leave will be sent
718255932Salfred		 * to SM later, if at all needed. We dequeue the request now. */
719255932Salfred		if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE)
720255932Salfred			rc += handle_leave_req(group, req_join_state, req);
721255932Salfred		else
722255932Salfred			rc += handle_join_req(group, req_join_state, req);
723255932Salfred	}
724255932Salfred
725255932Salfred	/* Handle leaves */
726255932Salfred	if (group->state == MCAST_IDLE) {
727255932Salfred		req_join_state = get_leave_state(group);
728255932Salfred		if (req_join_state) {
729255932Salfred			group->rec.scope_join_state &= ~req_join_state;
730255932Salfred			group->prev_state = group->state;
731255932Salfred			if (send_leave_to_wire(group, req_join_state)) {
732255932Salfred				group->state = group->prev_state;
733255932Salfred				++rc;
734255932Salfred			} else
735255932Salfred				group->state = MCAST_LEAVE_SENT;
736255932Salfred		}
737255932Salfred	}
738255932Salfred
739255932Salfred	if (!list_empty(&group->pending_list) && group->state == MCAST_IDLE)
740255932Salfred		goto process_requests;
741255932Salfred	mutex_unlock(&group->lock);
742255932Salfred
743255932Salfred	while (rc--)
744255932Salfred		release_group(group, 0);
745255932Salfred}
746255932Salfred
747255932Salfredstatic struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx *ctx,
748255932Salfred						       __be64 tid,
749255932Salfred						       union ib_gid *new_mgid)
750255932Salfred{
751331769Shselasky	struct mcast_group *group = NULL, *cur_group, *n;
752255932Salfred	struct mcast_req *req;
753255932Salfred
754255932Salfred	mutex_lock(&ctx->mcg_table_lock);
755331769Shselasky	list_for_each_entry_safe(group, n, &ctx->mcg_mgid0_list, mgid0_list) {
756255932Salfred		mutex_lock(&group->lock);
757255932Salfred		if (group->last_req_tid == tid) {
758255932Salfred			if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
759255932Salfred				group->rec.mgid = *new_mgid;
760255932Salfred				sprintf(group->name, "%016llx%016llx",
761278886Shselasky						(long long)be64_to_cpu(group->rec.mgid.global.subnet_prefix),
762278886Shselasky						(long long)be64_to_cpu(group->rec.mgid.global.interface_id));
763255932Salfred				list_del_init(&group->mgid0_list);
764255932Salfred				cur_group = mcast_insert(ctx, group);
765255932Salfred				if (cur_group) {
766255932Salfred					/* A race between our code and SM. Silently cleaning the new one */
767255932Salfred					req = list_first_entry(&group->pending_list,
768255932Salfred							       struct mcast_req, group_list);
769255932Salfred					--group->func[req->func].num_pend_reqs;
770255932Salfred					list_del(&req->group_list);
771255932Salfred					list_del(&req->func_list);
772255932Salfred					kfree(req);
773255932Salfred					mutex_unlock(&group->lock);
774255932Salfred					mutex_unlock(&ctx->mcg_table_lock);
775255932Salfred					release_group(group, 0);
776255932Salfred					return NULL;
777255932Salfred				}
778255932Salfred
779255932Salfred				atomic_inc(&group->refcount);
780255932Salfred				add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
781255932Salfred				mutex_unlock(&group->lock);
782255932Salfred				mutex_unlock(&ctx->mcg_table_lock);
783255932Salfred				return group;
784255932Salfred			} else {
785255932Salfred				struct mcast_req *tmp1, *tmp2;
786255932Salfred
787255932Salfred				list_del(&group->mgid0_list);
788255932Salfred				if (!list_empty(&group->pending_list) && group->state != MCAST_IDLE)
789255932Salfred					cancel_delayed_work_sync(&group->timeout_work);
790255932Salfred
791255932Salfred				list_for_each_entry_safe(tmp1, tmp2, &group->pending_list, group_list) {
792255932Salfred					list_del(&tmp1->group_list);
793255932Salfred					kfree(tmp1);
794255932Salfred				}
795255932Salfred				mutex_unlock(&group->lock);
796255932Salfred				mutex_unlock(&ctx->mcg_table_lock);
797255932Salfred				kfree(group);
798255932Salfred				return NULL;
799255932Salfred			}
800255932Salfred		}
801255932Salfred		mutex_unlock(&group->lock);
802255932Salfred	}
803255932Salfred	mutex_unlock(&ctx->mcg_table_lock);
804255932Salfred
805255932Salfred	return NULL;
806255932Salfred}
807255932Salfred
808255932Salfredstatic ssize_t sysfs_show_group(struct device *dev,
809255932Salfred		struct device_attribute *attr, char *buf);
810255932Salfred
811255932Salfredstatic struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
812255932Salfred					 union ib_gid *mgid, int create,
813255932Salfred					 gfp_t gfp_mask)
814255932Salfred{
815255932Salfred	struct mcast_group *group, *cur_group;
816255932Salfred	int is_mgid0;
817255932Salfred	int i;
818255932Salfred
819255932Salfred	is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
820255932Salfred	if (!is_mgid0) {
821255932Salfred		group = mcast_find(ctx, mgid);
822255932Salfred		if (group)
823255932Salfred			goto found;
824255932Salfred	}
825255932Salfred
826255932Salfred	if (!create)
827255932Salfred		return ERR_PTR(-ENOENT);
828255932Salfred
829255932Salfred	group = kzalloc(sizeof *group, gfp_mask);
830255932Salfred	if (!group)
831255932Salfred		return ERR_PTR(-ENOMEM);
832255932Salfred
833255932Salfred	group->demux = ctx;
834255932Salfred	group->rec.mgid = *mgid;
835255932Salfred	INIT_LIST_HEAD(&group->pending_list);
836255932Salfred	INIT_LIST_HEAD(&group->mgid0_list);
837255932Salfred	for (i = 0; i < MAX_VFS; ++i)
838255932Salfred		INIT_LIST_HEAD(&group->func[i].pending);
839255932Salfred	INIT_WORK(&group->work, mlx4_ib_mcg_work_handler);
840255932Salfred	INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler);
841255932Salfred	mutex_init(&group->lock);
842255932Salfred	sprintf(group->name, "%016llx%016llx",
843278886Shselasky			(long long)be64_to_cpu(
844278886Shselasky			    group->rec.mgid.global.subnet_prefix),
845278886Shselasky			(long long)be64_to_cpu(
846278886Shselasky			    group->rec.mgid.global.interface_id));
847255932Salfred	sysfs_attr_init(&group->dentry.attr);
848255932Salfred	group->dentry.show = sysfs_show_group;
849255932Salfred	group->dentry.store = NULL;
850255932Salfred	group->dentry.attr.name = group->name;
851255932Salfred	group->dentry.attr.mode = 0400;
852255932Salfred	group->state = MCAST_IDLE;
853255932Salfred
854255932Salfred	if (is_mgid0) {
855255932Salfred		list_add(&group->mgid0_list, &ctx->mcg_mgid0_list);
856255932Salfred		goto found;
857255932Salfred	}
858255932Salfred
859255932Salfred	cur_group = mcast_insert(ctx, group);
860255932Salfred	if (cur_group) {
861255932Salfred		mcg_warn("group just showed up %s - confused\n", cur_group->name);
862255932Salfred		kfree(group);
863255932Salfred		return ERR_PTR(-EINVAL);
864255932Salfred	}
865255932Salfred
866255932Salfred	add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
867255932Salfred
868255932Salfredfound:
869255932Salfred	atomic_inc(&group->refcount);
870255932Salfred	return group;
871255932Salfred}
872255932Salfred
873255932Salfredstatic void queue_req(struct mcast_req *req)
874255932Salfred{
875255932Salfred	struct mcast_group *group = req->group;
876255932Salfred
877255932Salfred	atomic_inc(&group->refcount); /* for the request */
878255932Salfred	atomic_inc(&group->refcount); /* for scheduling the work */
879255932Salfred	list_add_tail(&req->group_list, &group->pending_list);
880255932Salfred	list_add_tail(&req->func_list, &group->func[req->func].pending);
881255932Salfred	/* calls mlx4_ib_mcg_work_handler */
882278886Shselasky	if (!queue_work(group->demux->mcg_wq, &group->work))
883296382Shselasky		safe_atomic_dec(&group->refcount);
884255932Salfred}
885255932Salfred
886255932Salfredint mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
887255932Salfred			      struct ib_sa_mad *mad)
888255932Salfred{
889255932Salfred	struct mlx4_ib_dev *dev = to_mdev(ibdev);
890255932Salfred	struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)mad->data;
891255932Salfred	struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
892255932Salfred	struct mcast_group *group;
893255932Salfred
894255932Salfred	switch (mad->mad_hdr.method) {
895255932Salfred	case IB_MGMT_METHOD_GET_RESP:
896255932Salfred	case IB_SA_METHOD_DELETE_RESP:
897255932Salfred		mutex_lock(&ctx->mcg_table_lock);
898255932Salfred		group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL);
899255932Salfred		mutex_unlock(&ctx->mcg_table_lock);
900255932Salfred		if (IS_ERR(group)) {
901255932Salfred			if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) {
902255932Salfred				__be64 tid = mad->mad_hdr.tid;
903255932Salfred				*(u8 *)(&tid) = (u8)slave; /* in group we kept the modified TID */
904255932Salfred				group = search_relocate_mgid0_group(ctx, tid, &rec->mgid);
905255932Salfred			} else
906255932Salfred				group = NULL;
907255932Salfred		}
908255932Salfred
909255932Salfred		if (!group)
910255932Salfred			return 1;
911255932Salfred
912255932Salfred		mutex_lock(&group->lock);
913255932Salfred		group->response_sa_mad = *mad;
914255932Salfred		group->prev_state = group->state;
915255932Salfred		group->state = MCAST_RESP_READY;
916255932Salfred		/* calls mlx4_ib_mcg_work_handler */
917255932Salfred		atomic_inc(&group->refcount);
918278886Shselasky		if (!queue_work(ctx->mcg_wq, &group->work))
919296382Shselasky			safe_atomic_dec(&group->refcount);
920255932Salfred		mutex_unlock(&group->lock);
921255932Salfred		release_group(group, 0);
922255932Salfred		return 1; /* consumed */
923255932Salfred	case IB_MGMT_METHOD_SET:
924255932Salfred	case IB_SA_METHOD_GET_TABLE:
925255932Salfred	case IB_SA_METHOD_GET_TABLE_RESP:
926255932Salfred	case IB_SA_METHOD_DELETE:
927255932Salfred		return 0; /* not consumed, pass-through to guest over tunnel */
928255932Salfred	default:
929255932Salfred		mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n",
930255932Salfred			port, mad->mad_hdr.method);
931255932Salfred		return 1; /* consumed */
932255932Salfred	}
933255932Salfred}
934255932Salfred
935255932Salfredint mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
936255932Salfred				  int slave, struct ib_sa_mad *sa_mad)
937255932Salfred{
938255932Salfred	struct mlx4_ib_dev *dev = to_mdev(ibdev);
939255932Salfred	struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)sa_mad->data;
940255932Salfred	struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
941255932Salfred	struct mcast_group *group;
942255932Salfred	struct mcast_req *req;
943255932Salfred	int may_create = 0;
944255932Salfred
945255932Salfred	if (ctx->flushing)
946255932Salfred		return -EAGAIN;
947255932Salfred
948255932Salfred	switch (sa_mad->mad_hdr.method) {
949255932Salfred	case IB_MGMT_METHOD_SET:
950255932Salfred		may_create = 1;
951255932Salfred	case IB_SA_METHOD_DELETE:
952255932Salfred		req = kzalloc(sizeof *req, GFP_KERNEL);
953255932Salfred		if (!req)
954255932Salfred			return -ENOMEM;
955255932Salfred
956255932Salfred		req->func = slave;
957255932Salfred		req->sa_mad = *sa_mad;
958255932Salfred
959255932Salfred		mutex_lock(&ctx->mcg_table_lock);
960255932Salfred		group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL);
961255932Salfred		mutex_unlock(&ctx->mcg_table_lock);
962255932Salfred		if (IS_ERR(group)) {
963255932Salfred			kfree(req);
964255932Salfred			return PTR_ERR(group);
965255932Salfred		}
966255932Salfred		mutex_lock(&group->lock);
967255932Salfred		if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) {
968255932Salfred			mutex_unlock(&group->lock);
969331769Shselasky			mcg_debug_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
970331769Shselasky					port, slave, MAX_PEND_REQS_PER_FUNC);
971255932Salfred			release_group(group, 0);
972255932Salfred			kfree(req);
973255932Salfred			return -ENOMEM;
974255932Salfred		}
975255932Salfred		++group->func[slave].num_pend_reqs;
976255932Salfred		req->group = group;
977255932Salfred		queue_req(req);
978255932Salfred		mutex_unlock(&group->lock);
979255932Salfred		release_group(group, 0);
980255932Salfred		return 1; /* consumed */
981255932Salfred	case IB_SA_METHOD_GET_TABLE:
982255932Salfred	case IB_MGMT_METHOD_GET_RESP:
983255932Salfred	case IB_SA_METHOD_GET_TABLE_RESP:
984255932Salfred	case IB_SA_METHOD_DELETE_RESP:
985255932Salfred		return 0; /* not consumed, pass-through */
986255932Salfred	default:
987255932Salfred		mcg_warn("In multiplex, port %d, func %d: unexpected MCMember method: 0x%x, dropping\n",
988255932Salfred			port, slave, sa_mad->mad_hdr.method);
989255932Salfred		return 1; /* consumed */
990255932Salfred	}
991255932Salfred}
992255932Salfred
993255932Salfredstatic ssize_t sysfs_show_group(struct device *dev,
994255932Salfred		struct device_attribute *attr, char *buf)
995255932Salfred{
996255932Salfred	struct mcast_group *group =
997255932Salfred		container_of(attr, struct mcast_group, dentry);
998255932Salfred	struct mcast_req *req = NULL;
999255932Salfred	char pending_str[40];
1000255932Salfred	char state_str[40];
1001255932Salfred	ssize_t len = 0;
1002255932Salfred	int f;
1003255932Salfred
1004255932Salfred	if (group->state == MCAST_IDLE)
1005255932Salfred		sprintf(state_str, "%s", get_state_string(group->state));
1006255932Salfred	else
1007255932Salfred		sprintf(state_str, "%s(TID=0x%llx)",
1008255932Salfred				get_state_string(group->state),
1009278886Shselasky				(long long)be64_to_cpu(group->last_req_tid));
1010255932Salfred	if (list_empty(&group->pending_list)) {
1011255932Salfred		sprintf(pending_str, "No");
1012255932Salfred	} else {
1013255932Salfred		req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
1014255932Salfred		sprintf(pending_str, "Yes(TID=0x%llx)",
1015278886Shselasky				(long long)be64_to_cpu(
1016278886Shselasky				    req->sa_mad.mad_hdr.tid));
1017255932Salfred	}
1018255932Salfred	len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s     ",
1019255932Salfred			group->rec.scope_join_state & 0xf,
1020255932Salfred			group->members[2], group->members[1], group->members[0],
1021255932Salfred			atomic_read(&group->refcount),
1022255932Salfred			pending_str,
1023255932Salfred			state_str);
1024255932Salfred	for (f = 0; f < MAX_VFS; ++f)
1025255932Salfred		if (group->func[f].state == MCAST_MEMBER)
1026255932Salfred			len += sprintf(buf + len, "%d[%1x] ",
1027255932Salfred					f, group->func[f].join_state);
1028255932Salfred
1029255932Salfred	len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x "
1030255932Salfred		"%4x %4x %2x %2x)\n",
1031255932Salfred		be16_to_cpu(group->rec.pkey),
1032255932Salfred		be32_to_cpu(group->rec.qkey),
1033255932Salfred		(group->rec.mtusel_mtu & 0xc0) >> 6,
1034255932Salfred		group->rec.mtusel_mtu & 0x3f,
1035255932Salfred		group->rec.tclass,
1036255932Salfred		(group->rec.ratesel_rate & 0xc0) >> 6,
1037255932Salfred		group->rec.ratesel_rate & 0x3f,
1038255932Salfred		(be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28,
1039255932Salfred		(be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8,
1040255932Salfred		be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff,
1041255932Salfred		group->rec.proxy_join);
1042255932Salfred
1043255932Salfred	return len;
1044255932Salfred}
1045255932Salfred
1046255932Salfredint mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx)
1047255932Salfred{
1048255932Salfred	char name[20];
1049255932Salfred
1050255932Salfred	atomic_set(&ctx->tid, 0);
1051255932Salfred	sprintf(name, "mlx4_ib_mcg%d", ctx->port);
1052331769Shselasky	ctx->mcg_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
1053255932Salfred	if (!ctx->mcg_wq)
1054255932Salfred		return -ENOMEM;
1055255932Salfred
1056255932Salfred	mutex_init(&ctx->mcg_table_lock);
1057255932Salfred	ctx->mcg_table = RB_ROOT;
1058255932Salfred	INIT_LIST_HEAD(&ctx->mcg_mgid0_list);
1059255932Salfred	ctx->flushing = 0;
1060255932Salfred
1061255932Salfred	return 0;
1062255932Salfred}
1063255932Salfred
1064255932Salfredstatic void force_clean_group(struct mcast_group *group)
1065255932Salfred{
1066255932Salfred	struct mcast_req *req, *tmp
1067255932Salfred		;
1068255932Salfred	list_for_each_entry_safe(req, tmp, &group->pending_list, group_list) {
1069255932Salfred		list_del(&req->group_list);
1070255932Salfred		kfree(req);
1071255932Salfred	}
1072255932Salfred	del_sysfs_port_mcg_attr(group->demux->dev, group->demux->port, &group->dentry.attr);
1073255932Salfred	rb_erase(&group->node, &group->demux->mcg_table);
1074255932Salfred	kfree(group);
1075255932Salfred}
1076255932Salfred
1077255932Salfredstatic void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
1078255932Salfred{
1079255932Salfred	int i;
1080255932Salfred	struct rb_node *p;
1081255932Salfred	struct mcast_group *group;
1082255932Salfred	unsigned long end;
1083255932Salfred	int count;
1084255932Salfred
1085255932Salfred	for (i = 0; i < MAX_VFS; ++i)
1086255932Salfred		clean_vf_mcast(ctx, i);
1087255932Salfred
1088255932Salfred	end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
1089255932Salfred	do {
1090255932Salfred		count = 0;
1091255932Salfred		mutex_lock(&ctx->mcg_table_lock);
1092255932Salfred		for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p))
1093255932Salfred			++count;
1094255932Salfred		mutex_unlock(&ctx->mcg_table_lock);
1095255932Salfred		if (!count)
1096255932Salfred			break;
1097255932Salfred
1098255932Salfred		msleep(1);
1099255932Salfred	} while (time_after(end, jiffies));
1100255932Salfred
1101255932Salfred	flush_workqueue(ctx->mcg_wq);
1102255932Salfred	if (destroy_wq)
1103255932Salfred		destroy_workqueue(ctx->mcg_wq);
1104255932Salfred
1105255932Salfred	mutex_lock(&ctx->mcg_table_lock);
1106255932Salfred	while ((p = rb_first(&ctx->mcg_table)) != NULL) {
1107255932Salfred		group = rb_entry(p, struct mcast_group, node);
1108255932Salfred		if (atomic_read(&group->refcount))
1109255932Salfred			mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
1110255932Salfred
1111255932Salfred		force_clean_group(group);
1112255932Salfred	}
1113255932Salfred	mutex_unlock(&ctx->mcg_table_lock);
1114255932Salfred}
1115255932Salfred
1116255932Salfredstruct clean_work {
1117255932Salfred	struct work_struct work;
1118255932Salfred	struct mlx4_ib_demux_ctx *ctx;
1119255932Salfred	int destroy_wq;
1120255932Salfred};
1121255932Salfred
1122255932Salfredstatic void mcg_clean_task(struct work_struct *work)
1123255932Salfred{
1124255932Salfred	struct clean_work *cw = container_of(work, struct clean_work, work);
1125255932Salfred
1126255932Salfred	_mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq);
1127255932Salfred	cw->ctx->flushing = 0;
1128255932Salfred	kfree(cw);
1129255932Salfred}
1130255932Salfred
1131255932Salfredvoid mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
1132255932Salfred{
1133255932Salfred	struct clean_work *work;
1134255932Salfred
1135255932Salfred	if (ctx->flushing)
1136255932Salfred		return;
1137255932Salfred
1138255932Salfred	ctx->flushing = 1;
1139255932Salfred
1140255932Salfred	if (destroy_wq) {
1141255932Salfred		_mlx4_ib_mcg_port_cleanup(ctx, destroy_wq);
1142255932Salfred		ctx->flushing = 0;
1143255932Salfred		return;
1144255932Salfred	}
1145255932Salfred
1146255932Salfred	work = kmalloc(sizeof *work, GFP_KERNEL);
1147255932Salfred	if (!work) {
1148255932Salfred		ctx->flushing = 0;
1149255932Salfred		mcg_warn("failed allocating work for cleanup\n");
1150255932Salfred		return;
1151255932Salfred	}
1152255932Salfred
1153255932Salfred	work->ctx = ctx;
1154255932Salfred	work->destroy_wq = destroy_wq;
1155255932Salfred	INIT_WORK(&work->work, mcg_clean_task);
1156255932Salfred	queue_work(clean_wq, &work->work);
1157255932Salfred}
1158255932Salfred
1159255932Salfredstatic void build_leave_mad(struct mcast_req *req)
1160255932Salfred{
1161255932Salfred	struct ib_sa_mad *mad = &req->sa_mad;
1162255932Salfred
1163255932Salfred	mad->mad_hdr.method = IB_SA_METHOD_DELETE;
1164255932Salfred}
1165255932Salfred
1166255932Salfred
1167255932Salfredstatic void clear_pending_reqs(struct mcast_group *group, int vf)
1168255932Salfred{
1169255932Salfred	struct mcast_req *req, *tmp, *group_first = NULL;
1170255932Salfred	int clear;
1171255932Salfred	int pend = 0;
1172255932Salfred
1173255932Salfred	if (!list_empty(&group->pending_list))
1174255932Salfred		group_first = list_first_entry(&group->pending_list, struct mcast_req, group_list);
1175255932Salfred
1176255932Salfred	list_for_each_entry_safe(req, tmp, &group->func[vf].pending, func_list) {
1177255932Salfred		clear = 1;
1178255932Salfred		if (group_first == req &&
1179255932Salfred		    (group->state == MCAST_JOIN_SENT ||
1180255932Salfred		     group->state == MCAST_LEAVE_SENT)) {
1181255932Salfred			clear = cancel_delayed_work(&group->timeout_work);
1182255932Salfred			pend = !clear;
1183255932Salfred			group->state = MCAST_IDLE;
1184255932Salfred		}
1185255932Salfred		if (clear) {
1186255932Salfred			--group->func[vf].num_pend_reqs;
1187255932Salfred			list_del(&req->group_list);
1188255932Salfred			list_del(&req->func_list);
1189255932Salfred			kfree(req);
1190255932Salfred			atomic_dec(&group->refcount);
1191255932Salfred		}
1192255932Salfred	}
1193255932Salfred
1194255932Salfred	if (!pend && (!list_empty(&group->func[vf].pending) || group->func[vf].num_pend_reqs)) {
1195255932Salfred		mcg_warn_group(group, "DRIVER BUG: list_empty %d, num_pend_reqs %d\n",
1196255932Salfred			       list_empty(&group->func[vf].pending), group->func[vf].num_pend_reqs);
1197255932Salfred	}
1198255932Salfred}
1199255932Salfred
1200255932Salfredstatic int push_deleteing_req(struct mcast_group *group, int slave)
1201255932Salfred{
1202255932Salfred	struct mcast_req *req;
1203255932Salfred	struct mcast_req *pend_req;
1204255932Salfred
1205255932Salfred	if (!group->func[slave].join_state)
1206255932Salfred		return 0;
1207255932Salfred
1208255932Salfred	req = kzalloc(sizeof *req, GFP_KERNEL);
1209255932Salfred	if (!req) {
1210255932Salfred		mcg_warn_group(group, "failed allocation - may leave stall groups\n");
1211255932Salfred		return -ENOMEM;
1212255932Salfred	}
1213255932Salfred
1214255932Salfred	if (!list_empty(&group->func[slave].pending)) {
1215255932Salfred		pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list);
1216255932Salfred		if (pend_req->clean) {
1217255932Salfred			kfree(req);
1218255932Salfred			return 0;
1219255932Salfred		}
1220255932Salfred	}
1221255932Salfred
1222255932Salfred	req->clean = 1;
1223255932Salfred	req->func = slave;
1224255932Salfred	req->group = group;
1225255932Salfred	++group->func[slave].num_pend_reqs;
1226255932Salfred	build_leave_mad(req);
1227255932Salfred	queue_req(req);
1228255932Salfred	return 0;
1229255932Salfred}
1230255932Salfred
1231255932Salfredvoid clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave)
1232255932Salfred{
1233255932Salfred	struct mcast_group *group;
1234255932Salfred	struct rb_node *p;
1235255932Salfred
1236255932Salfred	mutex_lock(&ctx->mcg_table_lock);
1237255932Salfred	for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) {
1238255932Salfred		group = rb_entry(p, struct mcast_group, node);
1239255932Salfred		mutex_lock(&group->lock);
1240255932Salfred		if (atomic_read(&group->refcount)) {
1241255932Salfred			/* clear pending requests of this VF */
1242255932Salfred			clear_pending_reqs(group, slave);
1243255932Salfred			push_deleteing_req(group, slave);
1244255932Salfred		}
1245255932Salfred		mutex_unlock(&group->lock);
1246255932Salfred	}
1247255932Salfred	mutex_unlock(&ctx->mcg_table_lock);
1248255932Salfred}
1249255932Salfred
1250255932Salfred
1251255932Salfredint mlx4_ib_mcg_init(void)
1252255932Salfred{
1253331769Shselasky	clean_wq = alloc_ordered_workqueue("mlx4_ib_mcg", WQ_MEM_RECLAIM);
1254255932Salfred	if (!clean_wq)
1255255932Salfred		return -ENOMEM;
1256255932Salfred
1257255932Salfred	return 0;
1258255932Salfred}
1259255932Salfred
1260255932Salfredvoid mlx4_ib_mcg_destroy(void)
1261255932Salfred{
1262255932Salfred	destroy_workqueue(clean_wq);
1263255932Salfred}
1264