1/*
2 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
5 *
6 * This software is available to you under a choice of one of two
7 * licenses.  You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
11 *
12 *     Redistribution and use in source and binary forms, with or
13 *     without modification, are permitted provided that the following
14 *     conditions are met:
15 *
16 *      - Redistributions of source code must retain the above
17 *        copyright notice, this list of conditions and the following
18 *        disclaimer.
19 *
20 *      - Redistributions in binary form must reproduce the above
21 *        copyright notice, this list of conditions and the following
22 *        disclaimer in the documentation and/or other materials
23 *        provided with the distribution.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE.
33 */
34
35#include "ipoib.h"
36
37#include <linux/delay.h>
38#include <linux/completion.h>
39
40#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
41static int mcast_debug_level = 1;
42
43module_param(mcast_debug_level, int, 0644);
44MODULE_PARM_DESC(mcast_debug_level,
45		 "Enable multicast debug tracing if > 0");
46#endif
47
48static DEFINE_MUTEX(mcast_mutex);
49
50struct ipoib_mcast_iter {
51	struct ipoib_dev_priv *priv;
52	union ib_gid       mgid;
53	unsigned long      created;
54	unsigned int       queuelen;
55	unsigned int       complete;
56	unsigned int       send_only;
57};
58
59static void ipoib_mcast_free(struct ipoib_mcast *mcast)
60{
61	struct ifnet *dev = mcast->priv->dev;
62	int tx_dropped = 0;
63
64	ipoib_dbg_mcast(mcast->priv, "deleting multicast group %16D\n",
65			mcast->mcmember.mgid.raw, ":");
66
67	if (mcast->ah)
68		ipoib_put_ah(mcast->ah);
69
70	tx_dropped = mcast->pkt_queue.ifq_len;
71	_IF_DRAIN(&mcast->pkt_queue);	/* XXX Locking. */
72
73	dev->if_oerrors += tx_dropped;
74
75	kfree(mcast);
76}
77
78static struct ipoib_mcast *ipoib_mcast_alloc(struct ipoib_dev_priv *priv,
79					     int can_sleep)
80{
81	struct ipoib_mcast *mcast;
82
83	mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
84	if (!mcast)
85		return NULL;
86
87	mcast->priv = priv;
88	mcast->created = jiffies;
89	mcast->backoff = 1;
90
91	INIT_LIST_HEAD(&mcast->list);
92	bzero(&mcast->pkt_queue, sizeof(mcast->pkt_queue));
93
94	return mcast;
95}
96
97static struct ipoib_mcast *__ipoib_mcast_find(struct ipoib_dev_priv *priv,
98    void *mgid)
99{
100	struct rb_node *n = priv->multicast_tree.rb_node;
101
102	while (n) {
103		struct ipoib_mcast *mcast;
104		int ret;
105
106		mcast = rb_entry(n, struct ipoib_mcast, rb_node);
107
108		ret = memcmp(mgid, mcast->mcmember.mgid.raw,
109			     sizeof (union ib_gid));
110		if (ret < 0)
111			n = n->rb_left;
112		else if (ret > 0)
113			n = n->rb_right;
114		else
115			return mcast;
116	}
117
118	return NULL;
119}
120
121static int __ipoib_mcast_add(struct ipoib_dev_priv *priv,
122    struct ipoib_mcast *mcast)
123{
124	struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
125
126	while (*n) {
127		struct ipoib_mcast *tmcast;
128		int ret;
129
130		pn = *n;
131		tmcast = rb_entry(pn, struct ipoib_mcast, rb_node);
132
133		ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
134			     sizeof (union ib_gid));
135		if (ret < 0)
136			n = &pn->rb_left;
137		else if (ret > 0)
138			n = &pn->rb_right;
139		else
140			return -EEXIST;
141	}
142
143	rb_link_node(&mcast->rb_node, pn, n);
144	rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
145
146	return 0;
147}
148
149static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
150				   struct ib_sa_mcmember_rec *mcmember)
151{
152	struct ipoib_dev_priv *priv = mcast->priv;
153	struct ifnet *dev = priv->dev;
154	struct ipoib_ah *ah;
155	int ret;
156	int set_qkey = 0;
157
158	mcast->mcmember = *mcmember;
159
160	/* Set the cached Q_Key before we attach if it's the broadcast group */
161	if (!memcmp(mcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4,
162		    sizeof (union ib_gid))) {
163		spin_lock_irq(&priv->lock);
164		if (!priv->broadcast) {
165			spin_unlock_irq(&priv->lock);
166			return -EAGAIN;
167		}
168		priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
169		spin_unlock_irq(&priv->lock);
170		priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
171		set_qkey = 1;
172	}
173
174	if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
175		if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
176			ipoib_warn(priv, "multicast group %16D already attached\n",
177				   mcast->mcmember.mgid.raw, ":");
178
179			return 0;
180		}
181
182		ret = ipoib_mcast_attach(priv, be16_to_cpu(mcast->mcmember.mlid),
183					 &mcast->mcmember.mgid, set_qkey);
184		if (ret < 0) {
185			ipoib_warn(priv, "couldn't attach QP to multicast group %16D\n",
186				   mcast->mcmember.mgid.raw, ":");
187
188			clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
189			return ret;
190		}
191	}
192
193	{
194		struct ib_ah_attr av = {
195			.dlid	       = be16_to_cpu(mcast->mcmember.mlid),
196			.port_num      = priv->port,
197			.sl	       = mcast->mcmember.sl,
198			.ah_flags      = IB_AH_GRH,
199			.static_rate   = mcast->mcmember.rate,
200			.grh	       = {
201				.flow_label    = be32_to_cpu(mcast->mcmember.flow_label),
202				.hop_limit     = mcast->mcmember.hop_limit,
203				.sgid_index    = 0,
204				.traffic_class = mcast->mcmember.traffic_class
205			}
206		};
207		av.grh.dgid = mcast->mcmember.mgid;
208
209		ah = ipoib_create_ah(priv, priv->pd, &av);
210		if (!ah) {
211			ipoib_warn(priv, "ib_address_create failed\n");
212		} else {
213			spin_lock_irq(&priv->lock);
214			mcast->ah = ah;
215			spin_unlock_irq(&priv->lock);
216
217			ipoib_dbg_mcast(priv, "MGID %16D AV %p, LID 0x%04x, SL %d\n",
218					mcast->mcmember.mgid.raw, ":",
219					mcast->ah->ah,
220					be16_to_cpu(mcast->mcmember.mlid),
221					mcast->mcmember.sl);
222		}
223	}
224
225	/* actually send any queued packets */
226	while (mcast->pkt_queue.ifq_len) {
227		struct mbuf *mb;
228		_IF_DEQUEUE(&mcast->pkt_queue, mb);
229		mb->m_pkthdr.rcvif = dev;
230
231		if (dev->if_transmit(dev, mb))
232			ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
233	}
234
235	return 0;
236}
237
238static int
239ipoib_mcast_sendonly_join_complete(int status,
240				   struct ib_sa_multicast *multicast)
241{
242	struct ipoib_mcast *mcast = multicast->context;
243	struct ipoib_dev_priv *priv = mcast->priv;
244
245	/* We trap for port events ourselves. */
246	if (status == -ENETRESET)
247		return 0;
248
249	if (!status)
250		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
251
252	if (status) {
253		if (mcast->logcount++ < 20)
254			ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n",
255					mcast->mcmember.mgid.raw, ":", status);
256
257		/* Flush out any queued packets */
258		priv->dev->if_oerrors += mcast->pkt_queue.ifq_len;
259		_IF_DRAIN(&mcast->pkt_queue);
260
261		/* Clear the busy flag so we try again */
262		status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
263					    &mcast->flags);
264	}
265	return status;
266}
267
268static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
269{
270	struct ipoib_dev_priv *priv = mcast->priv;
271	struct ib_sa_mcmember_rec rec = {
272#if 0				/* Some SMs don't support send-only yet */
273		.join_state = 4
274#else
275		.join_state = 1
276#endif
277	};
278	int ret = 0;
279
280	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
281		ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
282		return -ENODEV;
283	}
284
285	if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
286		ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
287		return -EBUSY;
288	}
289
290	rec.mgid     = mcast->mcmember.mgid;
291	rec.port_gid = priv->local_gid;
292	rec.pkey     = cpu_to_be16(priv->pkey);
293
294	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
295					 priv->port, &rec,
296					 IB_SA_MCMEMBER_REC_MGID	|
297					 IB_SA_MCMEMBER_REC_PORT_GID	|
298					 IB_SA_MCMEMBER_REC_PKEY	|
299					 IB_SA_MCMEMBER_REC_JOIN_STATE,
300					 GFP_ATOMIC,
301					 ipoib_mcast_sendonly_join_complete,
302					 mcast);
303	if (IS_ERR(mcast->mc)) {
304		ret = PTR_ERR(mcast->mc);
305		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
306		ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
307			   ret);
308	} else {
309		ipoib_dbg_mcast(priv, "no multicast record for %16D, starting join\n",
310				mcast->mcmember.mgid.raw, ":");
311	}
312
313	return ret;
314}
315
316void ipoib_mcast_carrier_on_task(struct work_struct *work)
317{
318	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
319						   carrier_on_task);
320	struct ib_port_attr attr;
321
322	/*
323	 * Take rtnl_lock to avoid racing with ipoib_stop() and
324	 * turning the carrier back on while a device is being
325	 * removed.
326	 */
327	if (ib_query_port(priv->ca, priv->port, &attr) ||
328	    attr.state != IB_PORT_ACTIVE) {
329		ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
330		return;
331	}
332	if_link_state_change(priv->dev, LINK_STATE_UP);
333}
334
335static int ipoib_mcast_join_complete(int status,
336				     struct ib_sa_multicast *multicast)
337{
338	struct ipoib_mcast *mcast = multicast->context;
339	struct ipoib_dev_priv *priv = mcast->priv;
340
341	ipoib_dbg_mcast(priv, "join completion for %16D (status %d)\n",
342			mcast->mcmember.mgid.raw, ":", status);
343
344	/* We trap for port events ourselves. */
345	if (status == -ENETRESET)
346		return 0;
347
348	if (!status)
349		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
350
351	if (!status) {
352		mcast->backoff = 1;
353		mutex_lock(&mcast_mutex);
354		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
355			queue_delayed_work(ipoib_workqueue,
356					   &priv->mcast_task, 0);
357		mutex_unlock(&mcast_mutex);
358
359		/*
360		 * Defer carrier on work to ipoib_workqueue to avoid a
361		 * deadlock on rtnl_lock here.
362		 */
363		if (mcast == priv->broadcast)
364			queue_work(ipoib_workqueue, &priv->carrier_on_task);
365
366		return 0;
367	}
368
369	if (mcast->logcount++ < 20) {
370		if (status == -ETIMEDOUT || status == -EAGAIN) {
371			ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n",
372					mcast->mcmember.mgid.raw, ":", status);
373		} else {
374			ipoib_warn(priv, "multicast join failed for %16D, status %d\n",
375				   mcast->mcmember.mgid.raw, ":", status);
376		}
377	}
378
379	mcast->backoff *= 2;
380	if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
381		mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
382
383	/* Clear the busy flag so we try again */
384	status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
385
386	mutex_lock(&mcast_mutex);
387	spin_lock_irq(&priv->lock);
388	if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
389		queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
390				   mcast->backoff * HZ);
391	spin_unlock_irq(&priv->lock);
392	mutex_unlock(&mcast_mutex);
393
394	return status;
395}
396
397static void ipoib_mcast_join(struct ipoib_dev_priv *priv,
398    struct ipoib_mcast *mcast, int create)
399{
400	struct ib_sa_mcmember_rec rec = {
401		.join_state = 1
402	};
403	ib_sa_comp_mask comp_mask;
404	int ret = 0;
405
406	ipoib_dbg_mcast(priv, "joining MGID %16D\n",
407	    mcast->mcmember.mgid.raw, ":");
408
409	rec.mgid     = mcast->mcmember.mgid;
410	rec.port_gid = priv->local_gid;
411	rec.pkey     = cpu_to_be16(priv->pkey);
412
413	comp_mask =
414		IB_SA_MCMEMBER_REC_MGID		|
415		IB_SA_MCMEMBER_REC_PORT_GID	|
416		IB_SA_MCMEMBER_REC_PKEY		|
417		IB_SA_MCMEMBER_REC_JOIN_STATE;
418
419	if (create) {
420		comp_mask |=
421			IB_SA_MCMEMBER_REC_QKEY			|
422			IB_SA_MCMEMBER_REC_MTU_SELECTOR		|
423			IB_SA_MCMEMBER_REC_MTU			|
424			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS	|
425			IB_SA_MCMEMBER_REC_RATE_SELECTOR	|
426			IB_SA_MCMEMBER_REC_RATE			|
427			IB_SA_MCMEMBER_REC_SL			|
428			IB_SA_MCMEMBER_REC_FLOW_LABEL		|
429			IB_SA_MCMEMBER_REC_HOP_LIMIT;
430
431		rec.qkey	  = priv->broadcast->mcmember.qkey;
432		rec.mtu_selector  = IB_SA_EQ;
433		rec.mtu		  = priv->broadcast->mcmember.mtu;
434		rec.traffic_class = priv->broadcast->mcmember.traffic_class;
435		rec.rate_selector = IB_SA_EQ;
436		rec.rate	  = priv->broadcast->mcmember.rate;
437		rec.sl		  = priv->broadcast->mcmember.sl;
438		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
439		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
440	}
441
442	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
443	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
444					 &rec, comp_mask, GFP_KERNEL,
445					 ipoib_mcast_join_complete, mcast);
446	if (IS_ERR(mcast->mc)) {
447		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
448		ret = PTR_ERR(mcast->mc);
449		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
450
451		mcast->backoff *= 2;
452		if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
453			mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
454
455		mutex_lock(&mcast_mutex);
456		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
457			queue_delayed_work(ipoib_workqueue,
458					   &priv->mcast_task,
459					   mcast->backoff * HZ);
460		mutex_unlock(&mcast_mutex);
461	}
462}
463
464void ipoib_mcast_join_task(struct work_struct *work)
465{
466	struct ipoib_dev_priv *priv =
467		container_of(work, struct ipoib_dev_priv, mcast_task.work);
468	struct ifnet *dev = priv->dev;
469
470	ipoib_dbg_mcast(priv, "Running join task. flags 0x%lX\n", priv->flags);
471
472	if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
473		return;
474
475	if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
476		ipoib_warn(priv, "ib_query_gid() failed\n");
477	else
478		memcpy(IF_LLADDR(dev) + 4, priv->local_gid.raw, sizeof (union ib_gid));
479
480	{
481		struct ib_port_attr attr;
482
483		if (!ib_query_port(priv->ca, priv->port, &attr))
484			priv->local_lid = attr.lid;
485		else
486			ipoib_warn(priv, "ib_query_port failed\n");
487	}
488
489	if (!priv->broadcast) {
490		struct ipoib_mcast *broadcast;
491
492		if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
493			return;
494
495		broadcast = ipoib_mcast_alloc(priv, 1);
496		if (!broadcast) {
497			ipoib_warn(priv, "failed to allocate broadcast group\n");
498			mutex_lock(&mcast_mutex);
499			if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
500				queue_delayed_work(ipoib_workqueue,
501						   &priv->mcast_task, HZ);
502			mutex_unlock(&mcast_mutex);
503			return;
504		}
505
506		spin_lock_irq(&priv->lock);
507		memcpy(broadcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4,
508		       sizeof (union ib_gid));
509		priv->broadcast = broadcast;
510
511		__ipoib_mcast_add(priv, priv->broadcast);
512		spin_unlock_irq(&priv->lock);
513	}
514
515	if (priv->broadcast &&
516	    !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
517		if (priv->broadcast &&
518		    !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
519			ipoib_mcast_join(priv, priv->broadcast, 0);
520		return;
521	}
522
523	while (1) {
524		struct ipoib_mcast *mcast = NULL;
525
526		spin_lock_irq(&priv->lock);
527		list_for_each_entry(mcast, &priv->multicast_list, list) {
528			if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
529			    && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
530			    && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
531				/* Found the next unjoined group */
532				break;
533			}
534		}
535		spin_unlock_irq(&priv->lock);
536
537		if (&mcast->list == &priv->multicast_list) {
538			/* All done */
539			break;
540		}
541
542		ipoib_mcast_join(priv, mcast, 1);
543		return;
544	}
545
546	spin_lock_irq(&priv->lock);
547	if (priv->broadcast)
548		priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
549	else
550		priv->mcast_mtu = priv->admin_mtu;
551	spin_unlock_irq(&priv->lock);
552
553	if (!ipoib_cm_admin_enabled(priv))
554		ipoib_change_mtu(priv, min(priv->mcast_mtu, priv->admin_mtu));
555
556	ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
557
558	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
559}
560
561int ipoib_mcast_start_thread(struct ipoib_dev_priv *priv)
562{
563	ipoib_dbg_mcast(priv, "starting multicast thread flags 0x%lX\n",
564	    priv->flags);
565
566	mutex_lock(&mcast_mutex);
567	if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
568		queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
569	mutex_unlock(&mcast_mutex);
570
571	return 0;
572}
573
574int ipoib_mcast_stop_thread(struct ipoib_dev_priv *priv, int flush)
575{
576
577	ipoib_dbg_mcast(priv, "stopping multicast thread\n");
578
579	mutex_lock(&mcast_mutex);
580	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
581	cancel_delayed_work(&priv->mcast_task);
582	mutex_unlock(&mcast_mutex);
583
584	if (flush)
585		flush_workqueue(ipoib_workqueue);
586
587	return 0;
588}
589
590static int ipoib_mcast_leave(struct ipoib_dev_priv *priv, struct ipoib_mcast *mcast)
591{
592	int ret = 0;
593
594	if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
595		ib_sa_free_multicast(mcast->mc);
596
597	if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
598		ipoib_dbg_mcast(priv, "leaving MGID %16D\n",
599				mcast->mcmember.mgid.raw, ":");
600
601		/* Remove ourselves from the multicast group */
602		ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
603				      be16_to_cpu(mcast->mcmember.mlid));
604		if (ret)
605			ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
606	}
607
608	return 0;
609}
610
611void
612ipoib_mcast_send(struct ipoib_dev_priv *priv, void *mgid, struct mbuf *mb)
613{
614	struct ifnet *dev = priv->dev;
615	struct ipoib_mcast *mcast;
616
617	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)		||
618	    !priv->broadcast					||
619	    !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
620		++dev->if_oerrors;
621		m_freem(mb);
622		return;
623	}
624
625	mcast = __ipoib_mcast_find(priv, mgid);
626	if (!mcast) {
627		/* Let's create a new send only group now */
628		ipoib_dbg_mcast(priv, "setting up send only multicast group for %16D\n",
629				mgid, ":");
630
631		mcast = ipoib_mcast_alloc(priv, 0);
632		if (!mcast) {
633			ipoib_warn(priv, "unable to allocate memory for "
634				   "multicast structure\n");
635			++dev->if_oerrors;
636			m_freem(mb);
637			goto out;
638		}
639
640		set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
641		memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
642		__ipoib_mcast_add(priv, mcast);
643		list_add_tail(&mcast->list, &priv->multicast_list);
644	}
645
646	if (!mcast->ah) {
647		if (mcast->pkt_queue.ifq_len < IPOIB_MAX_MCAST_QUEUE) {
648			_IF_ENQUEUE(&mcast->pkt_queue, mb);
649		} else {
650			++dev->if_oerrors;
651			m_freem(mb);
652		}
653
654		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
655			ipoib_dbg_mcast(priv, "no address vector, "
656					"but multicast join already started\n");
657		else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
658			ipoib_mcast_sendonly_join(mcast);
659
660		/*
661		 * If lookup completes between here and out:, don't
662		 * want to send packet twice.
663		 */
664		mcast = NULL;
665	}
666
667out:
668	if (mcast && mcast->ah)
669		ipoib_send(priv, mb, mcast->ah, IB_MULTICAST_QPN);
670}
671
672void ipoib_mcast_dev_flush(struct ipoib_dev_priv *priv)
673{
674	LIST_HEAD(remove_list);
675	struct ipoib_mcast *mcast, *tmcast;
676	unsigned long flags;
677
678	ipoib_dbg_mcast(priv, "flushing multicast list\n");
679
680	spin_lock_irqsave(&priv->lock, flags);
681
682	list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
683		list_del(&mcast->list);
684		rb_erase(&mcast->rb_node, &priv->multicast_tree);
685		list_add_tail(&mcast->list, &remove_list);
686	}
687
688	if (priv->broadcast) {
689		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
690		list_add_tail(&priv->broadcast->list, &remove_list);
691		priv->broadcast = NULL;
692	}
693
694	spin_unlock_irqrestore(&priv->lock, flags);
695
696	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
697		ipoib_mcast_leave(priv, mcast);
698		ipoib_mcast_free(mcast);
699	}
700}
701
702static int ipoib_mcast_addr_is_valid(const u8 *addr, unsigned int addrlen,
703				     const u8 *broadcast)
704{
705	if (addrlen != INFINIBAND_ALEN)
706		return 0;
707	/* reserved QPN, prefix, scope */
708	if (memcmp(addr, broadcast, 6))
709		return 0;
710	/* signature lower, pkey */
711	if (memcmp(addr + 7, broadcast + 7, 3))
712		return 0;
713	return 1;
714}
715
716void ipoib_mcast_restart_task(struct work_struct *work)
717{
718	struct ipoib_dev_priv *priv =
719		container_of(work, struct ipoib_dev_priv, restart_task);
720	ipoib_mcast_restart(priv);
721}
722
723void ipoib_mcast_restart(struct ipoib_dev_priv *priv)
724{
725	struct ifnet *dev = priv->dev;
726	struct ifmultiaddr *ifma;
727	struct ipoib_mcast *mcast, *tmcast;
728	LIST_HEAD(remove_list);
729	struct ib_sa_mcmember_rec rec;
730	int addrlen;
731
732	ipoib_dbg_mcast(priv, "restarting multicast task flags 0x%lX\n",
733	    priv->flags);
734
735	ipoib_mcast_stop_thread(priv, 0);
736
737	if_maddr_rlock(dev);
738	spin_lock(&priv->lock);
739
740	/*
741	 * Unfortunately, the networking core only gives us a list of all of
742	 * the multicast hardware addresses. We need to figure out which ones
743	 * are new and which ones have been removed
744	 */
745
746	/* Clear out the found flag */
747	list_for_each_entry(mcast, &priv->multicast_list, list)
748		clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
749
750	/* Mark all of the entries that are found or don't exist */
751
752
753	TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) {
754		union ib_gid mgid;
755		uint8_t *addr;
756
757		if (ifma->ifma_addr->sa_family != AF_LINK)
758			continue;
759		addr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
760		addrlen = ((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen;
761		if (!ipoib_mcast_addr_is_valid(addr, addrlen,
762					       dev->if_broadcastaddr))
763			continue;
764
765		memcpy(mgid.raw, addr + 4, sizeof mgid);
766
767		mcast = __ipoib_mcast_find(priv, &mgid);
768		if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
769			struct ipoib_mcast *nmcast;
770
771			/* ignore group which is directly joined by userspace */
772			if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
773			    !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
774				ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %16D\n",
775						mgid.raw, ":");
776				continue;
777			}
778
779			/* Not found or send-only group, let's add a new entry */
780			ipoib_dbg_mcast(priv, "adding multicast entry for mgid %16D\n",
781					mgid.raw, ":");
782
783			nmcast = ipoib_mcast_alloc(priv, 0);
784			if (!nmcast) {
785				ipoib_warn(priv, "unable to allocate memory for multicast structure\n");
786				continue;
787			}
788
789			set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags);
790
791			nmcast->mcmember.mgid = mgid;
792
793			if (mcast) {
794				/* Destroy the send only entry */
795				list_move_tail(&mcast->list, &remove_list);
796
797				rb_replace_node(&mcast->rb_node,
798						&nmcast->rb_node,
799						&priv->multicast_tree);
800			} else
801				__ipoib_mcast_add(priv, nmcast);
802
803			list_add_tail(&nmcast->list, &priv->multicast_list);
804		}
805
806		if (mcast)
807			set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
808	}
809
810	/* Remove all of the entries don't exist anymore */
811	list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
812		if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&
813		    !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
814			ipoib_dbg_mcast(priv, "deleting multicast group %16D\n",
815					mcast->mcmember.mgid.raw, ":");
816
817			rb_erase(&mcast->rb_node, &priv->multicast_tree);
818
819			/* Move to the remove list */
820			list_move_tail(&mcast->list, &remove_list);
821		}
822	}
823
824	spin_unlock(&priv->lock);
825	if_maddr_runlock(dev);
826
827	/* We have to cancel outside of the spinlock */
828	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
829		ipoib_mcast_leave(mcast->priv, mcast);
830		ipoib_mcast_free(mcast);
831	}
832
833	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
834		ipoib_mcast_start_thread(priv);
835}
836
837#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
838
839struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct ipoib_dev_priv *priv)
840{
841	struct ipoib_mcast_iter *iter;
842
843	iter = kmalloc(sizeof *iter, GFP_KERNEL);
844	if (!iter)
845		return NULL;
846
847	iter->priv = priv;
848	memset(iter->mgid.raw, 0, 16);
849
850	if (ipoib_mcast_iter_next(iter)) {
851		kfree(iter);
852		return NULL;
853	}
854
855	return iter;
856}
857
858int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
859{
860	struct ipoib_dev_priv *priv = iter->priv;
861	struct rb_node *n;
862	struct ipoib_mcast *mcast;
863	int ret = 1;
864
865	spin_lock_irq(&priv->lock);
866
867	n = rb_first(&priv->multicast_tree);
868
869	while (n) {
870		mcast = rb_entry(n, struct ipoib_mcast, rb_node);
871
872		if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw,
873			   sizeof (union ib_gid)) < 0) {
874			iter->mgid      = mcast->mcmember.mgid;
875			iter->created   = mcast->created;
876			iter->queuelen  = mcast->pkt_queue.ifq_len;
877			iter->complete  = !!mcast->ah;
878			iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY));
879
880			ret = 0;
881
882			break;
883		}
884
885		n = rb_next(n);
886	}
887
888	spin_unlock_irq(&priv->lock);
889
890	return ret;
891}
892
893void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
894			   union ib_gid *mgid,
895			   unsigned long *created,
896			   unsigned int *queuelen,
897			   unsigned int *complete,
898			   unsigned int *send_only)
899{
900	*mgid      = iter->mgid;
901	*created   = iter->created;
902	*queuelen  = iter->queuelen;
903	*complete  = iter->complete;
904	*send_only = iter->send_only;
905}
906
907#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
908