1116518Sphk/*
2116518Sphk * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3116518Sphk * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4116518Sphk * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
5116518Sphk *
6116518Sphk * This software is available to you under a choice of one of two
7116518Sphk * licenses.  You may choose to be licensed under the terms of the GNU
8116518Sphk * General Public License (GPL) Version 2, available from the file
9116518Sphk * COPYING in the main directory of this source tree, or the
10116518Sphk * OpenIB.org BSD license below:
11116518Sphk *
12116518Sphk *     Redistribution and use in source and binary forms, with or
13116518Sphk *     without modification, are permitted provided that the following
14116518Sphk *     conditions are met:
15116518Sphk *
16116518Sphk *      - Redistributions of source code must retain the above
17116518Sphk *        copyright notice, this list of conditions and the following
18116518Sphk *        disclaimer.
19116518Sphk *
20116518Sphk *      - Redistributions in binary form must reproduce the above
21116518Sphk *        copyright notice, this list of conditions and the following
22116518Sphk *        disclaimer in the documentation and/or other materials
23116518Sphk *        provided with the distribution.
24116518Sphk *
25116518Sphk * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26116518Sphk * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27116518Sphk * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28116518Sphk * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29116518Sphk * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30116518Sphk * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31116518Sphk * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32116518Sphk * SOFTWARE.
33116518Sphk */
34116518Sphk
35116518Sphk#include "ipoib.h"
36116518Sphk
37116518Sphk#include <linux/delay.h>
38116518Sphk#include <linux/completion.h>
39116518Sphk
40116518Sphk#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
41116518Sphkstatic int mcast_debug_level = 1;
42116518Sphk
43116518Sphkmodule_param(mcast_debug_level, int, 0644);
44116518SphkMODULE_PARM_DESC(mcast_debug_level,
45116518Sphk		 "Enable multicast debug tracing if > 0");
46116518Sphk#endif
47116518Sphk
48116518Sphkstatic DEFINE_MUTEX(mcast_mutex);
49116518Sphk
50116518Sphkstruct ipoib_mcast_iter {
51116518Sphk	struct ipoib_dev_priv *priv;
52116518Sphk	union ib_gid       mgid;
53116518Sphk	unsigned long      created;
54116518Sphk	unsigned int       queuelen;
55116518Sphk	unsigned int       complete;
56116518Sphk	unsigned int       send_only;
57116518Sphk};
58116518Sphk
59116518Sphkstatic void ipoib_mcast_free(struct ipoib_mcast *mcast)
60116518Sphk{
61116518Sphk	struct ifnet *dev = mcast->priv->dev;
62116518Sphk	int tx_dropped = 0;
63116518Sphk
64116518Sphk	ipoib_dbg_mcast(mcast->priv, "deleting multicast group %16D\n",
65116518Sphk			mcast->mcmember.mgid.raw, ":");
66116518Sphk
67116518Sphk	if (mcast->ah)
68116518Sphk		ipoib_put_ah(mcast->ah);
69116518Sphk
70116518Sphk	tx_dropped = mcast->pkt_queue.ifq_len;
71116518Sphk	_IF_DRAIN(&mcast->pkt_queue);	/* XXX Locking. */
72116518Sphk
73116518Sphk	dev->if_oerrors += tx_dropped;
74116518Sphk
75116518Sphk	kfree(mcast);
76116518Sphk}
77116518Sphk
78116518Sphkstatic struct ipoib_mcast *ipoib_mcast_alloc(struct ipoib_dev_priv *priv,
79116518Sphk					     int can_sleep)
80116518Sphk{
81116518Sphk	struct ipoib_mcast *mcast;
82116518Sphk
83116518Sphk	mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
84116518Sphk	if (!mcast)
85116518Sphk		return NULL;
86116518Sphk
87116518Sphk	mcast->priv = priv;
88116518Sphk	mcast->created = jiffies;
89116518Sphk	mcast->backoff = 1;
90116518Sphk
91116518Sphk	INIT_LIST_HEAD(&mcast->list);
92116518Sphk	bzero(&mcast->pkt_queue, sizeof(mcast->pkt_queue));
93125803Sphk
94116518Sphk	return mcast;
95116518Sphk}
96116518Sphk
97116518Sphkstatic struct ipoib_mcast *__ipoib_mcast_find(struct ipoib_dev_priv *priv,
98125755Sphk    void *mgid)
99116518Sphk{
100116518Sphk	struct rb_node *n = priv->multicast_tree.rb_node;
101116518Sphk
102116518Sphk	while (n) {
103116518Sphk		struct ipoib_mcast *mcast;
104116518Sphk		int ret;
105116518Sphk
106116518Sphk		mcast = rb_entry(n, struct ipoib_mcast, rb_node);
107116518Sphk
108116518Sphk		ret = memcmp(mgid, mcast->mcmember.mgid.raw,
109116518Sphk			     sizeof (union ib_gid));
110116518Sphk		if (ret < 0)
111116518Sphk			n = n->rb_left;
112116518Sphk		else if (ret > 0)
113116518Sphk			n = n->rb_right;
114116518Sphk		else
115116518Sphk			return mcast;
116116518Sphk	}
117116518Sphk
118116518Sphk	return NULL;
119116518Sphk}
120116518Sphk
121116518Sphkstatic int __ipoib_mcast_add(struct ipoib_dev_priv *priv,
122116518Sphk    struct ipoib_mcast *mcast)
123116518Sphk{
124116518Sphk	struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
125116518Sphk
126116518Sphk	while (*n) {
127116518Sphk		struct ipoib_mcast *tmcast;
128116518Sphk		int ret;
129116518Sphk
130116518Sphk		pn = *n;
131116518Sphk		tmcast = rb_entry(pn, struct ipoib_mcast, rb_node);
132125755Sphk
133116518Sphk		ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
134116518Sphk			     sizeof (union ib_gid));
135116518Sphk		if (ret < 0)
136116518Sphk			n = &pn->rb_left;
137116518Sphk		else if (ret > 0)
138116518Sphk			n = &pn->rb_right;
139116518Sphk		else
140116518Sphk			return -EEXIST;
141116518Sphk	}
142116518Sphk
143116518Sphk	rb_link_node(&mcast->rb_node, pn, n);
144116518Sphk	rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
145116518Sphk
146116518Sphk	return 0;
147116518Sphk}
148116518Sphk
149116518Sphkstatic int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
150116518Sphk				   struct ib_sa_mcmember_rec *mcmember)
151116518Sphk{
152116518Sphk	struct ipoib_dev_priv *priv = mcast->priv;
153116518Sphk	struct ifnet *dev = priv->dev;
154116518Sphk	struct ipoib_ah *ah;
155116518Sphk	int ret;
156116518Sphk	int set_qkey = 0;
157116518Sphk
158116518Sphk	mcast->mcmember = *mcmember;
159116518Sphk
160116518Sphk	/* Set the cached Q_Key before we attach if it's the broadcast group */
161116518Sphk	if (!memcmp(mcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4,
162116518Sphk		    sizeof (union ib_gid))) {
163116518Sphk		spin_lock_irq(&priv->lock);
164116518Sphk		if (!priv->broadcast) {
165116518Sphk			spin_unlock_irq(&priv->lock);
166116518Sphk			return -EAGAIN;
167116518Sphk		}
168116518Sphk		priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
169116518Sphk		spin_unlock_irq(&priv->lock);
170116518Sphk		priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
171116518Sphk		set_qkey = 1;
172116518Sphk	}
173116518Sphk
174116518Sphk	if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
175116518Sphk		if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
176116518Sphk			ipoib_warn(priv, "multicast group %16D already attached\n",
177116518Sphk				   mcast->mcmember.mgid.raw, ":");
178116518Sphk
179116518Sphk			return 0;
180116518Sphk		}
181125755Sphk
182116518Sphk		ret = ipoib_mcast_attach(priv, be16_to_cpu(mcast->mcmember.mlid),
183116518Sphk					 &mcast->mcmember.mgid, set_qkey);
184116518Sphk		if (ret < 0) {
185116518Sphk			ipoib_warn(priv, "couldn't attach QP to multicast group %16D\n",
186116518Sphk				   mcast->mcmember.mgid.raw, ":");
187116518Sphk
188116518Sphk			clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
189116518Sphk			return ret;
190116518Sphk		}
191116518Sphk	}
192121366Sphk
193116518Sphk	{
194116518Sphk		struct ib_ah_attr av = {
195116518Sphk			.dlid	       = be16_to_cpu(mcast->mcmember.mlid),
196116518Sphk			.port_num      = priv->port,
197116518Sphk			.sl	       = mcast->mcmember.sl,
198116518Sphk			.ah_flags      = IB_AH_GRH,
199116518Sphk			.static_rate   = mcast->mcmember.rate,
200116518Sphk			.grh	       = {
201116518Sphk				.flow_label    = be32_to_cpu(mcast->mcmember.flow_label),
202116518Sphk				.hop_limit     = mcast->mcmember.hop_limit,
203116518Sphk				.sgid_index    = 0,
204116518Sphk				.traffic_class = mcast->mcmember.traffic_class
205116518Sphk			}
206116518Sphk		};
207116518Sphk		av.grh.dgid = mcast->mcmember.mgid;
208116518Sphk
209116518Sphk		ah = ipoib_create_ah(priv, priv->pd, &av);
210116518Sphk		if (!ah) {
211116518Sphk			ipoib_warn(priv, "ib_address_create failed\n");
212116518Sphk		} else {
213116518Sphk			spin_lock_irq(&priv->lock);
214116518Sphk			mcast->ah = ah;
215116518Sphk			spin_unlock_irq(&priv->lock);
216116518Sphk
217116518Sphk			ipoib_dbg_mcast(priv, "MGID %16D AV %p, LID 0x%04x, SL %d\n",
218116518Sphk					mcast->mcmember.mgid.raw, ":",
219116518Sphk					mcast->ah->ah,
220116518Sphk					be16_to_cpu(mcast->mcmember.mlid),
221116518Sphk					mcast->mcmember.sl);
222116518Sphk		}
223116518Sphk	}
224116518Sphk
225116518Sphk	/* actually send any queued packets */
226116518Sphk	while (mcast->pkt_queue.ifq_len) {
227116518Sphk		struct mbuf *mb;
228116518Sphk		_IF_DEQUEUE(&mcast->pkt_queue, mb);
229116518Sphk		mb->m_pkthdr.rcvif = dev;
230116518Sphk
231116518Sphk		if (dev->if_transmit(dev, mb))
232116518Sphk			ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
233116518Sphk	}
234116518Sphk
235116518Sphk	return 0;
236116518Sphk}
237116518Sphk
238116518Sphkstatic int
239116518Sphkipoib_mcast_sendonly_join_complete(int status,
240116518Sphk				   struct ib_sa_multicast *multicast)
241116518Sphk{
242116518Sphk	struct ipoib_mcast *mcast = multicast->context;
243116518Sphk	struct ipoib_dev_priv *priv = mcast->priv;
244116518Sphk
245116518Sphk	/* We trap for port events ourselves. */
246116518Sphk	if (status == -ENETRESET)
247116518Sphk		return 0;
248116518Sphk
249116518Sphk	if (!status)
250116518Sphk		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
251116518Sphk
252116518Sphk	if (status) {
253116518Sphk		if (mcast->logcount++ < 20)
254116518Sphk			ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n",
255116518Sphk					mcast->mcmember.mgid.raw, ":", status);
256116518Sphk
257116518Sphk		/* Flush out any queued packets */
258116518Sphk		priv->dev->if_oerrors += mcast->pkt_queue.ifq_len;
259116518Sphk		_IF_DRAIN(&mcast->pkt_queue);
260116518Sphk
261116518Sphk		/* Clear the busy flag so we try again */
262116518Sphk		status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
263116518Sphk					    &mcast->flags);
264116518Sphk	}
265116518Sphk	return status;
266116518Sphk}
267116518Sphk
268116518Sphkstatic int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
269116518Sphk{
270116518Sphk	struct ipoib_dev_priv *priv = mcast->priv;
271116518Sphk	struct ib_sa_mcmember_rec rec = {
272116518Sphk#if 0				/* Some SMs don't support send-only yet */
273116518Sphk		.join_state = 4
274116518Sphk#else
275116518Sphk		.join_state = 1
276116518Sphk#endif
277116518Sphk	};
278116518Sphk	int ret = 0;
279116518Sphk
280116518Sphk	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
281116518Sphk		ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
282116518Sphk		return -ENODEV;
283116518Sphk	}
284116518Sphk
285116518Sphk	if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
286116518Sphk		ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
287116518Sphk		return -EBUSY;
288116518Sphk	}
289116518Sphk
290116518Sphk	rec.mgid     = mcast->mcmember.mgid;
291116518Sphk	rec.port_gid = priv->local_gid;
292125803Sphk	rec.pkey     = cpu_to_be16(priv->pkey);
293125803Sphk
294125803Sphk	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
295125803Sphk					 priv->port, &rec,
296125803Sphk					 IB_SA_MCMEMBER_REC_MGID	|
297125803Sphk					 IB_SA_MCMEMBER_REC_PORT_GID	|
298116518Sphk					 IB_SA_MCMEMBER_REC_PKEY	|
299116518Sphk					 IB_SA_MCMEMBER_REC_JOIN_STATE,
300116518Sphk					 GFP_ATOMIC,
301116518Sphk					 ipoib_mcast_sendonly_join_complete,
302116518Sphk					 mcast);
303116518Sphk	if (IS_ERR(mcast->mc)) {
304116518Sphk		ret = PTR_ERR(mcast->mc);
305125755Sphk		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
306116518Sphk		ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
307116518Sphk			   ret);
308116518Sphk	} else {
309116518Sphk		ipoib_dbg_mcast(priv, "no multicast record for %16D, starting join\n",
310116518Sphk				mcast->mcmember.mgid.raw, ":");
311116518Sphk	}
312116518Sphk
313116518Sphk	return ret;
314116518Sphk}
315125755Sphk
316116518Sphkvoid ipoib_mcast_carrier_on_task(struct work_struct *work)
317116518Sphk{
318116518Sphk	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
319116518Sphk						   carrier_on_task);
320116518Sphk	struct ib_port_attr attr;
321116518Sphk
322116518Sphk	/*
323116518Sphk	 * Take rtnl_lock to avoid racing with ipoib_stop() and
324116518Sphk	 * turning the carrier back on while a device is being
325125755Sphk	 * removed.
326116518Sphk	 */
327116518Sphk	if (ib_query_port(priv->ca, priv->port, &attr) ||
328116518Sphk	    attr.state != IB_PORT_ACTIVE) {
329116518Sphk		ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
330116518Sphk		return;
331116518Sphk	}
332116518Sphk	if_link_state_change(priv->dev, LINK_STATE_UP);
333116518Sphk}
334116518Sphk
335125755Sphkstatic int ipoib_mcast_join_complete(int status,
336116518Sphk				     struct ib_sa_multicast *multicast)
337116518Sphk{
338116518Sphk	struct ipoib_mcast *mcast = multicast->context;
339116518Sphk	struct ipoib_dev_priv *priv = mcast->priv;
340116518Sphk
341116518Sphk	ipoib_dbg_mcast(priv, "join completion for %16D (status %d)\n",
342116518Sphk			mcast->mcmember.mgid.raw, ":", status);
343116518Sphk
344116518Sphk	/* We trap for port events ourselves. */
345116518Sphk	if (status == -ENETRESET)
346116518Sphk		return 0;
347116518Sphk
348116518Sphk	if (!status)
349116518Sphk		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
350116518Sphk
351116518Sphk	if (!status) {
352116518Sphk		mcast->backoff = 1;
353116518Sphk		mutex_lock(&mcast_mutex);
354116518Sphk		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
355116518Sphk			queue_delayed_work(ipoib_workqueue,
356116518Sphk					   &priv->mcast_task, 0);
357116518Sphk		mutex_unlock(&mcast_mutex);
358116518Sphk
359116518Sphk		/*
360116518Sphk		 * Defer carrier on work to ipoib_workqueue to avoid a
361116518Sphk		 * deadlock on rtnl_lock here.
362125755Sphk		 */
363116518Sphk		if (mcast == priv->broadcast)
364116518Sphk			queue_work(ipoib_workqueue, &priv->carrier_on_task);
365116518Sphk
366116518Sphk		return 0;
367116518Sphk	}
368116518Sphk
369116518Sphk	if (mcast->logcount++ < 20) {
370116518Sphk		if (status == -ETIMEDOUT || status == -EAGAIN) {
371116518Sphk			ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n",
372116518Sphk					mcast->mcmember.mgid.raw, ":", status);
373116518Sphk		} else {
374116518Sphk			ipoib_warn(priv, "multicast join failed for %16D, status %d\n",
375116518Sphk				   mcast->mcmember.mgid.raw, ":", status);
376116518Sphk		}
377116518Sphk	}
378116518Sphk
379116518Sphk	mcast->backoff *= 2;
380116518Sphk	if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
381116518Sphk		mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
382116518Sphk
383116518Sphk	/* Clear the busy flag so we try again */
384116518Sphk	status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
385121475Sphk
386116518Sphk	mutex_lock(&mcast_mutex);
387116518Sphk	spin_lock_irq(&priv->lock);
388116518Sphk	if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
389116518Sphk		queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
390116518Sphk				   mcast->backoff * HZ);
391116518Sphk	spin_unlock_irq(&priv->lock);
392116518Sphk	mutex_unlock(&mcast_mutex);
393116518Sphk
394116518Sphk	return status;
395116518Sphk}
396116518Sphk
397116518Sphkstatic void ipoib_mcast_join(struct ipoib_dev_priv *priv,
398116518Sphk    struct ipoib_mcast *mcast, int create)
399116518Sphk{
400116518Sphk	struct ib_sa_mcmember_rec rec = {
401116518Sphk		.join_state = 1
402116518Sphk	};
403125755Sphk	ib_sa_comp_mask comp_mask;
404116518Sphk	int ret = 0;
405116518Sphk
406116518Sphk	ipoib_dbg_mcast(priv, "joining MGID %16D\n",
407116518Sphk	    mcast->mcmember.mgid.raw, ":");
408116518Sphk
409116518Sphk	rec.mgid     = mcast->mcmember.mgid;
410116518Sphk	rec.port_gid = priv->local_gid;
411121475Sphk	rec.pkey     = cpu_to_be16(priv->pkey);
412116518Sphk
413116518Sphk	comp_mask =
414116518Sphk		IB_SA_MCMEMBER_REC_MGID		|
415116518Sphk		IB_SA_MCMEMBER_REC_PORT_GID	|
416116518Sphk		IB_SA_MCMEMBER_REC_PKEY		|
417116518Sphk		IB_SA_MCMEMBER_REC_JOIN_STATE;
418116518Sphk
419116518Sphk	if (create) {
420116518Sphk		comp_mask |=
421116518Sphk			IB_SA_MCMEMBER_REC_QKEY			|
422116518Sphk			IB_SA_MCMEMBER_REC_MTU_SELECTOR		|
423116518Sphk			IB_SA_MCMEMBER_REC_MTU			|
424116518Sphk			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS	|
425116518Sphk			IB_SA_MCMEMBER_REC_RATE_SELECTOR	|
426116518Sphk			IB_SA_MCMEMBER_REC_RATE			|
427116518Sphk			IB_SA_MCMEMBER_REC_SL			|
428116518Sphk			IB_SA_MCMEMBER_REC_FLOW_LABEL		|
429116518Sphk			IB_SA_MCMEMBER_REC_HOP_LIMIT;
430116518Sphk
431116518Sphk		rec.qkey	  = priv->broadcast->mcmember.qkey;
432116518Sphk		rec.mtu_selector  = IB_SA_EQ;
433116518Sphk		rec.mtu		  = priv->broadcast->mcmember.mtu;
434116518Sphk		rec.traffic_class = priv->broadcast->mcmember.traffic_class;
435125755Sphk		rec.rate_selector = IB_SA_EQ;
436116518Sphk		rec.rate	  = priv->broadcast->mcmember.rate;
437116518Sphk		rec.sl		  = priv->broadcast->mcmember.sl;
438116518Sphk		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
439116518Sphk		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
440116518Sphk	}
441116518Sphk
442116518Sphk	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
443116518Sphk	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
444116518Sphk					 &rec, comp_mask, GFP_KERNEL,
445116518Sphk					 ipoib_mcast_join_complete, mcast);
446116518Sphk	if (IS_ERR(mcast->mc)) {
447116518Sphk		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
448116518Sphk		ret = PTR_ERR(mcast->mc);
449116518Sphk		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
450116518Sphk
451116518Sphk		mcast->backoff *= 2;
452116518Sphk		if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
453116518Sphk			mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
454116518Sphk
455116518Sphk		mutex_lock(&mcast_mutex);
456125538Sle		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
457116518Sphk			queue_delayed_work(ipoib_workqueue,
458116518Sphk					   &priv->mcast_task,
459116518Sphk					   mcast->backoff * HZ);
460116518Sphk		mutex_unlock(&mcast_mutex);
461116518Sphk	}
462116518Sphk}
463116518Sphk
464116518Sphkvoid ipoib_mcast_join_task(struct work_struct *work)
465133314Sphk{
466133314Sphk	struct ipoib_dev_priv *priv =
467133314Sphk		container_of(work, struct ipoib_dev_priv, mcast_task.work);
468133314Sphk	struct ifnet *dev = priv->dev;
469116518Sphk
470116518Sphk	ipoib_dbg_mcast(priv, "Running join task. flags 0x%lX\n", priv->flags);
471116518Sphk
472	if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
473		return;
474
475	if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
476		ipoib_warn(priv, "ib_query_gid() failed\n");
477	else
478		memcpy(IF_LLADDR(dev) + 4, priv->local_gid.raw, sizeof (union ib_gid));
479
480	{
481		struct ib_port_attr attr;
482
483		if (!ib_query_port(priv->ca, priv->port, &attr))
484			priv->local_lid = attr.lid;
485		else
486			ipoib_warn(priv, "ib_query_port failed\n");
487	}
488
489	if (!priv->broadcast) {
490		struct ipoib_mcast *broadcast;
491
492		if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
493			return;
494
495		broadcast = ipoib_mcast_alloc(priv, 1);
496		if (!broadcast) {
497			ipoib_warn(priv, "failed to allocate broadcast group\n");
498			mutex_lock(&mcast_mutex);
499			if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
500				queue_delayed_work(ipoib_workqueue,
501						   &priv->mcast_task, HZ);
502			mutex_unlock(&mcast_mutex);
503			return;
504		}
505
506		spin_lock_irq(&priv->lock);
507		memcpy(broadcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4,
508		       sizeof (union ib_gid));
509		priv->broadcast = broadcast;
510
511		__ipoib_mcast_add(priv, priv->broadcast);
512		spin_unlock_irq(&priv->lock);
513	}
514
515	if (priv->broadcast &&
516	    !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
517		if (priv->broadcast &&
518		    !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
519			ipoib_mcast_join(priv, priv->broadcast, 0);
520		return;
521	}
522
523	while (1) {
524		struct ipoib_mcast *mcast = NULL;
525
526		spin_lock_irq(&priv->lock);
527		list_for_each_entry(mcast, &priv->multicast_list, list) {
528			if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
529			    && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
530			    && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
531				/* Found the next unjoined group */
532				break;
533			}
534		}
535		spin_unlock_irq(&priv->lock);
536
537		if (&mcast->list == &priv->multicast_list) {
538			/* All done */
539			break;
540		}
541
542		ipoib_mcast_join(priv, mcast, 1);
543		return;
544	}
545
546	spin_lock_irq(&priv->lock);
547	if (priv->broadcast)
548		priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
549	else
550		priv->mcast_mtu = priv->admin_mtu;
551	spin_unlock_irq(&priv->lock);
552
553	if (!ipoib_cm_admin_enabled(priv))
554		ipoib_change_mtu(priv, min(priv->mcast_mtu, priv->admin_mtu));
555
556	ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
557
558	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
559}
560
561int ipoib_mcast_start_thread(struct ipoib_dev_priv *priv)
562{
563	ipoib_dbg_mcast(priv, "starting multicast thread flags 0x%lX\n",
564	    priv->flags);
565
566	mutex_lock(&mcast_mutex);
567	if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
568		queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
569	mutex_unlock(&mcast_mutex);
570
571	return 0;
572}
573
574int ipoib_mcast_stop_thread(struct ipoib_dev_priv *priv, int flush)
575{
576
577	ipoib_dbg_mcast(priv, "stopping multicast thread\n");
578
579	mutex_lock(&mcast_mutex);
580	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
581	cancel_delayed_work(&priv->mcast_task);
582	mutex_unlock(&mcast_mutex);
583
584	if (flush)
585		flush_workqueue(ipoib_workqueue);
586
587	return 0;
588}
589
590static int ipoib_mcast_leave(struct ipoib_dev_priv *priv, struct ipoib_mcast *mcast)
591{
592	int ret = 0;
593
594	if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
595		ib_sa_free_multicast(mcast->mc);
596
597	if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
598		ipoib_dbg_mcast(priv, "leaving MGID %16D\n",
599				mcast->mcmember.mgid.raw, ":");
600
601		/* Remove ourselves from the multicast group */
602		ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
603				      be16_to_cpu(mcast->mcmember.mlid));
604		if (ret)
605			ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
606	}
607
608	return 0;
609}
610
611void
612ipoib_mcast_send(struct ipoib_dev_priv *priv, void *mgid, struct mbuf *mb)
613{
614	struct ifnet *dev = priv->dev;
615	struct ipoib_mcast *mcast;
616
617	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)		||
618	    !priv->broadcast					||
619	    !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
620		++dev->if_oerrors;
621		m_freem(mb);
622		return;
623	}
624
625	mcast = __ipoib_mcast_find(priv, mgid);
626	if (!mcast) {
627		/* Let's create a new send only group now */
628		ipoib_dbg_mcast(priv, "setting up send only multicast group for %16D\n",
629				mgid, ":");
630
631		mcast = ipoib_mcast_alloc(priv, 0);
632		if (!mcast) {
633			ipoib_warn(priv, "unable to allocate memory for "
634				   "multicast structure\n");
635			++dev->if_oerrors;
636			m_freem(mb);
637			goto out;
638		}
639
640		set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
641		memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
642		__ipoib_mcast_add(priv, mcast);
643		list_add_tail(&mcast->list, &priv->multicast_list);
644	}
645
646	if (!mcast->ah) {
647		if (mcast->pkt_queue.ifq_len < IPOIB_MAX_MCAST_QUEUE) {
648			_IF_ENQUEUE(&mcast->pkt_queue, mb);
649		} else {
650			++dev->if_oerrors;
651			m_freem(mb);
652		}
653
654		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
655			ipoib_dbg_mcast(priv, "no address vector, "
656					"but multicast join already started\n");
657		else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
658			ipoib_mcast_sendonly_join(mcast);
659
660		/*
661		 * If lookup completes between here and out:, don't
662		 * want to send packet twice.
663		 */
664		mcast = NULL;
665	}
666
667out:
668	if (mcast && mcast->ah)
669		ipoib_send(priv, mb, mcast->ah, IB_MULTICAST_QPN);
670}
671
672void ipoib_mcast_dev_flush(struct ipoib_dev_priv *priv)
673{
674	LIST_HEAD(remove_list);
675	struct ipoib_mcast *mcast, *tmcast;
676	unsigned long flags;
677
678	ipoib_dbg_mcast(priv, "flushing multicast list\n");
679
680	spin_lock_irqsave(&priv->lock, flags);
681
682	list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
683		list_del(&mcast->list);
684		rb_erase(&mcast->rb_node, &priv->multicast_tree);
685		list_add_tail(&mcast->list, &remove_list);
686	}
687
688	if (priv->broadcast) {
689		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
690		list_add_tail(&priv->broadcast->list, &remove_list);
691		priv->broadcast = NULL;
692	}
693
694	spin_unlock_irqrestore(&priv->lock, flags);
695
696	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
697		ipoib_mcast_leave(priv, mcast);
698		ipoib_mcast_free(mcast);
699	}
700}
701
702static int ipoib_mcast_addr_is_valid(const u8 *addr, unsigned int addrlen,
703				     const u8 *broadcast)
704{
705	if (addrlen != INFINIBAND_ALEN)
706		return 0;
707	/* reserved QPN, prefix, scope */
708	if (memcmp(addr, broadcast, 6))
709		return 0;
710	/* signature lower, pkey */
711	if (memcmp(addr + 7, broadcast + 7, 3))
712		return 0;
713	return 1;
714}
715
716void ipoib_mcast_restart_task(struct work_struct *work)
717{
718	struct ipoib_dev_priv *priv =
719		container_of(work, struct ipoib_dev_priv, restart_task);
720	ipoib_mcast_restart(priv);
721}
722
723void ipoib_mcast_restart(struct ipoib_dev_priv *priv)
724{
725	struct ifnet *dev = priv->dev;
726	struct ifmultiaddr *ifma;
727	struct ipoib_mcast *mcast, *tmcast;
728	LIST_HEAD(remove_list);
729	struct ib_sa_mcmember_rec rec;
730	int addrlen;
731
732	ipoib_dbg_mcast(priv, "restarting multicast task flags 0x%lX\n",
733	    priv->flags);
734
735	ipoib_mcast_stop_thread(priv, 0);
736
737	if_maddr_rlock(dev);
738	spin_lock(&priv->lock);
739
740	/*
741	 * Unfortunately, the networking core only gives us a list of all of
742	 * the multicast hardware addresses. We need to figure out which ones
743	 * are new and which ones have been removed
744	 */
745
746	/* Clear out the found flag */
747	list_for_each_entry(mcast, &priv->multicast_list, list)
748		clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
749
750	/* Mark all of the entries that are found or don't exist */
751
752
753	TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) {
754		union ib_gid mgid;
755		uint8_t *addr;
756
757		if (ifma->ifma_addr->sa_family != AF_LINK)
758			continue;
759		addr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
760		addrlen = ((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen;
761		if (!ipoib_mcast_addr_is_valid(addr, addrlen,
762					       dev->if_broadcastaddr))
763			continue;
764
765		memcpy(mgid.raw, addr + 4, sizeof mgid);
766
767		mcast = __ipoib_mcast_find(priv, &mgid);
768		if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
769			struct ipoib_mcast *nmcast;
770
771			/* ignore group which is directly joined by userspace */
772			if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
773			    !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
774				ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %16D\n",
775						mgid.raw, ":");
776				continue;
777			}
778
779			/* Not found or send-only group, let's add a new entry */
780			ipoib_dbg_mcast(priv, "adding multicast entry for mgid %16D\n",
781					mgid.raw, ":");
782
783			nmcast = ipoib_mcast_alloc(priv, 0);
784			if (!nmcast) {
785				ipoib_warn(priv, "unable to allocate memory for multicast structure\n");
786				continue;
787			}
788
789			set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags);
790
791			nmcast->mcmember.mgid = mgid;
792
793			if (mcast) {
794				/* Destroy the send only entry */
795				list_move_tail(&mcast->list, &remove_list);
796
797				rb_replace_node(&mcast->rb_node,
798						&nmcast->rb_node,
799						&priv->multicast_tree);
800			} else
801				__ipoib_mcast_add(priv, nmcast);
802
803			list_add_tail(&nmcast->list, &priv->multicast_list);
804		}
805
806		if (mcast)
807			set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
808	}
809
810	/* Remove all of the entries don't exist anymore */
811	list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
812		if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&
813		    !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
814			ipoib_dbg_mcast(priv, "deleting multicast group %16D\n",
815					mcast->mcmember.mgid.raw, ":");
816
817			rb_erase(&mcast->rb_node, &priv->multicast_tree);
818
819			/* Move to the remove list */
820			list_move_tail(&mcast->list, &remove_list);
821		}
822	}
823
824	spin_unlock(&priv->lock);
825	if_maddr_runlock(dev);
826
827	/* We have to cancel outside of the spinlock */
828	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
829		ipoib_mcast_leave(mcast->priv, mcast);
830		ipoib_mcast_free(mcast);
831	}
832
833	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
834		ipoib_mcast_start_thread(priv);
835}
836
837#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
838
839struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct ipoib_dev_priv *priv)
840{
841	struct ipoib_mcast_iter *iter;
842
843	iter = kmalloc(sizeof *iter, GFP_KERNEL);
844	if (!iter)
845		return NULL;
846
847	iter->priv = priv;
848	memset(iter->mgid.raw, 0, 16);
849
850	if (ipoib_mcast_iter_next(iter)) {
851		kfree(iter);
852		return NULL;
853	}
854
855	return iter;
856}
857
858int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
859{
860	struct ipoib_dev_priv *priv = iter->priv;
861	struct rb_node *n;
862	struct ipoib_mcast *mcast;
863	int ret = 1;
864
865	spin_lock_irq(&priv->lock);
866
867	n = rb_first(&priv->multicast_tree);
868
869	while (n) {
870		mcast = rb_entry(n, struct ipoib_mcast, rb_node);
871
872		if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw,
873			   sizeof (union ib_gid)) < 0) {
874			iter->mgid      = mcast->mcmember.mgid;
875			iter->created   = mcast->created;
876			iter->queuelen  = mcast->pkt_queue.ifq_len;
877			iter->complete  = !!mcast->ah;
878			iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY));
879
880			ret = 0;
881
882			break;
883		}
884
885		n = rb_next(n);
886	}
887
888	spin_unlock_irq(&priv->lock);
889
890	return ret;
891}
892
893void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
894			   union ib_gid *mgid,
895			   unsigned long *created,
896			   unsigned int *queuelen,
897			   unsigned int *complete,
898			   unsigned int *send_only)
899{
900	*mgid      = iter->mgid;
901	*created   = iter->created;
902	*queuelen  = iter->queuelen;
903	*complete  = iter->complete;
904	*send_only = iter->send_only;
905}
906
907#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
908