1/*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses.  You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 *     Redistribution and use in source and binary forms, with or
15 *     without modification, are permitted provided that the following
16 *     conditions are met:
17 *
18 *      - Redistributions of source code must retain the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer.
21 *
22 *      - Redistributions in binary form must reproduce the above
23 *        copyright notice, this list of conditions and the following
24 *        disclaimer in the documentation and/or other materials
25 *        provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37#include <sys/cdefs.h>
38__FBSDID("$FreeBSD$");
39
40#include "ipoib.h"
41
42#include <linux/delay.h>
43#include <linux/completion.h>
44
45#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
46static int mcast_debug_level = 1;
47
48module_param(mcast_debug_level, int, 0644);
49MODULE_PARM_DESC(mcast_debug_level,
50		 "Enable multicast debug tracing if > 0");
51#endif
52
53static DEFINE_MUTEX(mcast_mutex);
54
55struct ipoib_mcast_iter {
56	struct ipoib_dev_priv *priv;
57	union ib_gid       mgid;
58	unsigned long      created;
59	unsigned int       queuelen;
60	unsigned int       complete;
61	unsigned int       send_only;
62};
63
64static void ipoib_mcast_free(struct ipoib_mcast *mcast)
65{
66	struct ifnet *dev = mcast->priv->dev;
67	int tx_dropped = 0;
68
69	ipoib_dbg_mcast(mcast->priv, "deleting multicast group %16D\n",
70			mcast->mcmember.mgid.raw, ":");
71
72	if (mcast->ah)
73		ipoib_put_ah(mcast->ah);
74
75	tx_dropped = mcast->pkt_queue.ifq_len;
76	_IF_DRAIN(&mcast->pkt_queue);	/* XXX Locking. */
77
78	if_inc_counter(dev, IFCOUNTER_OERRORS, tx_dropped);
79
80	kfree(mcast);
81}
82
83static struct ipoib_mcast *ipoib_mcast_alloc(struct ipoib_dev_priv *priv,
84					     int can_sleep)
85{
86	struct ipoib_mcast *mcast;
87
88	mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
89	if (!mcast)
90		return NULL;
91
92	mcast->priv = priv;
93	mcast->created = jiffies;
94	mcast->backoff = 1;
95
96	INIT_LIST_HEAD(&mcast->list);
97	bzero(&mcast->pkt_queue, sizeof(mcast->pkt_queue));
98
99	return mcast;
100}
101
102static struct ipoib_mcast *__ipoib_mcast_find(struct ipoib_dev_priv *priv,
103    void *mgid)
104{
105	struct rb_node *n = priv->multicast_tree.rb_node;
106
107	while (n) {
108		struct ipoib_mcast *mcast;
109		int ret;
110
111		mcast = rb_entry(n, struct ipoib_mcast, rb_node);
112
113		ret = memcmp(mgid, mcast->mcmember.mgid.raw,
114			     sizeof (union ib_gid));
115		if (ret < 0)
116			n = n->rb_left;
117		else if (ret > 0)
118			n = n->rb_right;
119		else
120			return mcast;
121	}
122
123	return NULL;
124}
125
126static int __ipoib_mcast_add(struct ipoib_dev_priv *priv,
127    struct ipoib_mcast *mcast)
128{
129	struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
130
131	while (*n) {
132		struct ipoib_mcast *tmcast;
133		int ret;
134
135		pn = *n;
136		tmcast = rb_entry(pn, struct ipoib_mcast, rb_node);
137
138		ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
139			     sizeof (union ib_gid));
140		if (ret < 0)
141			n = &pn->rb_left;
142		else if (ret > 0)
143			n = &pn->rb_right;
144		else
145			return -EEXIST;
146	}
147
148	rb_link_node(&mcast->rb_node, pn, n);
149	rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
150
151	return 0;
152}
153
154static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
155				   struct ib_sa_mcmember_rec *mcmember)
156{
157	struct ipoib_dev_priv *priv = mcast->priv;
158	struct ifnet *dev = priv->dev;
159	struct ipoib_ah *ah;
160	struct epoch_tracker et;
161	int ret;
162	int set_qkey = 0;
163
164	mcast->mcmember = *mcmember;
165
166	/* Set the cached Q_Key before we attach if it's the broadcast group */
167	if (!memcmp(mcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4,
168		    sizeof (union ib_gid))) {
169		spin_lock_irq(&priv->lock);
170		if (!priv->broadcast) {
171			spin_unlock_irq(&priv->lock);
172			return -EAGAIN;
173		}
174		priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
175		spin_unlock_irq(&priv->lock);
176		priv->tx_wr.remote_qkey = priv->qkey;
177		set_qkey = 1;
178	}
179
180	if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
181		if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
182			ipoib_warn(priv, "multicast group %16D already attached\n",
183				   mcast->mcmember.mgid.raw, ":");
184
185			return 0;
186		}
187
188		ret = ipoib_mcast_attach(priv, be16_to_cpu(mcast->mcmember.mlid),
189					 &mcast->mcmember.mgid, set_qkey);
190		if (ret < 0) {
191			ipoib_warn(priv, "couldn't attach QP to multicast group %16D\n",
192				   mcast->mcmember.mgid.raw, ":");
193
194			clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
195			return ret;
196		}
197	}
198
199	{
200		struct ib_ah_attr av = {
201			.dlid	       = be16_to_cpu(mcast->mcmember.mlid),
202			.port_num      = priv->port,
203			.sl	       = mcast->mcmember.sl,
204			.ah_flags      = IB_AH_GRH,
205			.static_rate   = mcast->mcmember.rate,
206			.grh	       = {
207				.flow_label    = be32_to_cpu(mcast->mcmember.flow_label),
208				.hop_limit     = mcast->mcmember.hop_limit,
209				.sgid_index    = 0,
210				.traffic_class = mcast->mcmember.traffic_class
211			}
212		};
213		av.grh.dgid = mcast->mcmember.mgid;
214
215		ah = ipoib_create_ah(priv, priv->pd, &av);
216		if (!ah) {
217			ipoib_warn(priv, "ib_address_create failed\n");
218		} else {
219			spin_lock_irq(&priv->lock);
220			mcast->ah = ah;
221			spin_unlock_irq(&priv->lock);
222
223			ipoib_dbg_mcast(priv, "MGID %16D AV %p, LID 0x%04x, SL %d\n",
224					mcast->mcmember.mgid.raw, ":",
225					mcast->ah->ah,
226					be16_to_cpu(mcast->mcmember.mlid),
227					mcast->mcmember.sl);
228		}
229	}
230
231	NET_EPOCH_ENTER(et);
232
233	/* actually send any queued packets */
234	while (mcast->pkt_queue.ifq_len) {
235		struct mbuf *mb;
236		_IF_DEQUEUE(&mcast->pkt_queue, mb);
237		mb->m_pkthdr.rcvif = dev;
238
239		if (dev->if_transmit(dev, mb))
240			ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
241	}
242
243	NET_EPOCH_EXIT(et);
244	return 0;
245}
246
247static int
248ipoib_mcast_sendonly_join_complete(int status,
249				   struct ib_sa_multicast *multicast)
250{
251	struct ipoib_mcast *mcast = multicast->context;
252	struct ipoib_dev_priv *priv = mcast->priv;
253
254	/* We trap for port events ourselves. */
255	if (status == -ENETRESET)
256		return 0;
257
258	if (!status)
259		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
260
261	if (status) {
262		if (mcast->logcount++ < 20)
263			ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n",
264					mcast->mcmember.mgid.raw, ":", status);
265
266		/* Flush out any queued packets */
267		if_inc_counter(priv->dev, IFCOUNTER_OERRORS, mcast->pkt_queue.ifq_len);
268		_IF_DRAIN(&mcast->pkt_queue);
269
270		/* Clear the busy flag so we try again */
271		status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
272					    &mcast->flags);
273	}
274	return status;
275}
276
277static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
278{
279	struct ipoib_dev_priv *priv = mcast->priv;
280	struct ib_sa_mcmember_rec rec = {
281#if 0				/* Some SMs don't support send-only yet */
282		.join_state = 4
283#else
284		.join_state = 1
285#endif
286	};
287	int ret = 0;
288
289	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
290		ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
291		return -ENODEV;
292	}
293
294	if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
295		ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
296		return -EBUSY;
297	}
298
299	rec.mgid     = mcast->mcmember.mgid;
300	rec.port_gid = priv->local_gid;
301	rec.pkey     = cpu_to_be16(priv->pkey);
302
303	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
304					 priv->port, &rec,
305					 IB_SA_MCMEMBER_REC_MGID	|
306					 IB_SA_MCMEMBER_REC_PORT_GID	|
307					 IB_SA_MCMEMBER_REC_PKEY	|
308					 IB_SA_MCMEMBER_REC_JOIN_STATE,
309					 GFP_ATOMIC,
310					 ipoib_mcast_sendonly_join_complete,
311					 mcast);
312	if (IS_ERR(mcast->mc)) {
313		ret = PTR_ERR(mcast->mc);
314		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
315		ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
316			   ret);
317	} else {
318		ipoib_dbg_mcast(priv, "no multicast record for %16D, starting join\n",
319				mcast->mcmember.mgid.raw, ":");
320	}
321
322	return ret;
323}
324
325void ipoib_mcast_carrier_on_task(struct work_struct *work)
326{
327	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
328						   carrier_on_task);
329	struct ib_port_attr attr;
330
331	/*
332	 * Take rtnl_lock to avoid racing with ipoib_stop() and
333	 * turning the carrier back on while a device is being
334	 * removed.
335	 */
336	if (ib_query_port(priv->ca, priv->port, &attr) ||
337	    attr.state != IB_PORT_ACTIVE) {
338		ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
339		return;
340	}
341	if_link_state_change(priv->dev, LINK_STATE_UP);
342}
343
344static int ipoib_mcast_join_complete(int status,
345				     struct ib_sa_multicast *multicast)
346{
347	struct ipoib_mcast *mcast = multicast->context;
348	struct ipoib_dev_priv *priv = mcast->priv;
349
350	ipoib_dbg_mcast(priv, "join completion for %16D (status %d)\n",
351			mcast->mcmember.mgid.raw, ":", status);
352
353	/* We trap for port events ourselves. */
354	if (status == -ENETRESET)
355		return 0;
356
357	if (!status)
358		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
359
360	if (!status) {
361		mcast->backoff = 1;
362		mutex_lock(&mcast_mutex);
363		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
364			queue_delayed_work(ipoib_workqueue,
365					   &priv->mcast_task, 0);
366		mutex_unlock(&mcast_mutex);
367
368		/*
369		 * Defer carrier on work to ipoib_workqueue to avoid a
370		 * deadlock on rtnl_lock here.
371		 */
372		if (mcast == priv->broadcast)
373			queue_work(ipoib_workqueue, &priv->carrier_on_task);
374
375		return 0;
376	}
377
378	if (mcast->logcount++ < 20) {
379		if (status == -ETIMEDOUT || status == -EAGAIN) {
380			ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n",
381					mcast->mcmember.mgid.raw, ":", status);
382		} else {
383			ipoib_warn(priv, "multicast join failed for %16D, status %d\n",
384				   mcast->mcmember.mgid.raw, ":", status);
385		}
386	}
387
388	mcast->backoff *= 2;
389	if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
390		mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
391
392	/* Clear the busy flag so we try again */
393	status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
394
395	mutex_lock(&mcast_mutex);
396	spin_lock_irq(&priv->lock);
397	if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
398		queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
399				   mcast->backoff * HZ);
400	spin_unlock_irq(&priv->lock);
401	mutex_unlock(&mcast_mutex);
402
403	return status;
404}
405
406static void ipoib_mcast_join(struct ipoib_dev_priv *priv,
407    struct ipoib_mcast *mcast, int create)
408{
409	struct ib_sa_mcmember_rec rec = {
410		.join_state = 1
411	};
412	ib_sa_comp_mask comp_mask;
413	int ret = 0;
414
415	ipoib_dbg_mcast(priv, "joining MGID %16D\n",
416	    mcast->mcmember.mgid.raw, ":");
417
418	rec.mgid     = mcast->mcmember.mgid;
419	rec.port_gid = priv->local_gid;
420	rec.pkey     = cpu_to_be16(priv->pkey);
421
422	comp_mask =
423		IB_SA_MCMEMBER_REC_MGID		|
424		IB_SA_MCMEMBER_REC_PORT_GID	|
425		IB_SA_MCMEMBER_REC_PKEY		|
426		IB_SA_MCMEMBER_REC_JOIN_STATE;
427
428	if (create) {
429		comp_mask |=
430			IB_SA_MCMEMBER_REC_QKEY			|
431			IB_SA_MCMEMBER_REC_MTU_SELECTOR		|
432			IB_SA_MCMEMBER_REC_MTU			|
433			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS	|
434			IB_SA_MCMEMBER_REC_RATE_SELECTOR	|
435			IB_SA_MCMEMBER_REC_RATE			|
436			IB_SA_MCMEMBER_REC_SL			|
437			IB_SA_MCMEMBER_REC_FLOW_LABEL		|
438			IB_SA_MCMEMBER_REC_HOP_LIMIT;
439
440		rec.qkey	  = priv->broadcast->mcmember.qkey;
441		rec.mtu_selector  = IB_SA_EQ;
442		rec.mtu		  = priv->broadcast->mcmember.mtu;
443		rec.traffic_class = priv->broadcast->mcmember.traffic_class;
444		rec.rate_selector = IB_SA_EQ;
445		rec.rate	  = priv->broadcast->mcmember.rate;
446		rec.sl		  = priv->broadcast->mcmember.sl;
447		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
448		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
449	}
450
451	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
452	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
453					 &rec, comp_mask, GFP_KERNEL,
454					 ipoib_mcast_join_complete, mcast);
455	if (IS_ERR(mcast->mc)) {
456		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
457		ret = PTR_ERR(mcast->mc);
458		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
459
460		mcast->backoff *= 2;
461		if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
462			mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
463
464		mutex_lock(&mcast_mutex);
465		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
466			queue_delayed_work(ipoib_workqueue,
467					   &priv->mcast_task,
468					   mcast->backoff * HZ);
469		mutex_unlock(&mcast_mutex);
470	}
471}
472
473void ipoib_mcast_join_task(struct work_struct *work)
474{
475	struct ipoib_dev_priv *priv =
476		container_of(work, struct ipoib_dev_priv, mcast_task.work);
477	struct ifnet *dev = priv->dev;
478	struct ib_port_attr attr;
479
480	ipoib_dbg_mcast(priv, "Running join task. flags 0x%lX\n", priv->flags);
481
482	if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
483		return;
484
485	if (ib_query_port(priv->ca, priv->port, &attr) ||
486            attr.state != IB_PORT_ACTIVE) {
487		ipoib_dbg(priv, "%s: port state is not ACTIVE (state = %d) suspend task.\n",
488                          __func__, attr.state);
489		return;
490	}
491
492	if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL))
493		ipoib_warn(priv, "ib_query_gid() failed\n");
494	else
495		memcpy(IF_LLADDR(dev) + 4, priv->local_gid.raw, sizeof (union ib_gid));
496
497	{
498		struct ib_port_attr attr;
499
500		if (!ib_query_port(priv->ca, priv->port, &attr))
501			priv->local_lid = attr.lid;
502		else
503			ipoib_warn(priv, "ib_query_port failed\n");
504	}
505
506	if (!priv->broadcast) {
507		struct ipoib_mcast *broadcast;
508
509		if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
510			return;
511
512		broadcast = ipoib_mcast_alloc(priv, 1);
513		if (!broadcast) {
514			ipoib_warn(priv, "failed to allocate broadcast group\n");
515			mutex_lock(&mcast_mutex);
516			if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
517				queue_delayed_work(ipoib_workqueue,
518						   &priv->mcast_task, HZ);
519			mutex_unlock(&mcast_mutex);
520			return;
521		}
522
523		spin_lock_irq(&priv->lock);
524		memcpy(broadcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4,
525		       sizeof (union ib_gid));
526		priv->broadcast = broadcast;
527
528		__ipoib_mcast_add(priv, priv->broadcast);
529		spin_unlock_irq(&priv->lock);
530	}
531
532	if (priv->broadcast &&
533	    !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
534		if (priv->broadcast &&
535		    !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
536			ipoib_mcast_join(priv, priv->broadcast, 0);
537		return;
538	}
539
540	while (1) {
541		struct ipoib_mcast *mcast = NULL;
542
543		spin_lock_irq(&priv->lock);
544		list_for_each_entry(mcast, &priv->multicast_list, list) {
545			if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
546			    && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
547			    && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
548				/* Found the next unjoined group */
549				break;
550			}
551		}
552		spin_unlock_irq(&priv->lock);
553
554		if (&mcast->list == &priv->multicast_list) {
555			/* All done */
556			break;
557		}
558
559		ipoib_mcast_join(priv, mcast, 1);
560		return;
561	}
562
563	spin_lock_irq(&priv->lock);
564	if (priv->broadcast)
565		priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
566	else
567		priv->mcast_mtu = priv->admin_mtu;
568	spin_unlock_irq(&priv->lock);
569
570	if (!ipoib_cm_admin_enabled(priv))
571		ipoib_change_mtu(priv, min(priv->mcast_mtu, priv->admin_mtu),
572		    true);
573
574	ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
575
576	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
577}
578
579int ipoib_mcast_start_thread(struct ipoib_dev_priv *priv)
580{
581	ipoib_dbg_mcast(priv, "starting multicast thread flags 0x%lX\n",
582	    priv->flags);
583
584	mutex_lock(&mcast_mutex);
585	if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
586		queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
587	mutex_unlock(&mcast_mutex);
588
589	return 0;
590}
591
592int ipoib_mcast_stop_thread(struct ipoib_dev_priv *priv, int flush)
593{
594
595	ipoib_dbg_mcast(priv, "stopping multicast thread\n");
596
597	mutex_lock(&mcast_mutex);
598	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
599	cancel_delayed_work(&priv->mcast_task);
600	mutex_unlock(&mcast_mutex);
601
602	if (flush)
603		flush_workqueue(ipoib_workqueue);
604
605	return 0;
606}
607
608static int ipoib_mcast_leave(struct ipoib_dev_priv *priv, struct ipoib_mcast *mcast)
609{
610	int ret = 0;
611
612	if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
613		ib_sa_free_multicast(mcast->mc);
614
615	if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
616		ipoib_dbg_mcast(priv, "leaving MGID %16D\n",
617				mcast->mcmember.mgid.raw, ":");
618
619		/* Remove ourselves from the multicast group */
620		ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
621				      be16_to_cpu(mcast->mcmember.mlid));
622		if (ret)
623			ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
624	}
625
626	return 0;
627}
628
629void
630ipoib_mcast_send(struct ipoib_dev_priv *priv, void *mgid, struct mbuf *mb)
631{
632	struct ifnet *dev = priv->dev;
633	struct ipoib_mcast *mcast;
634
635	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)		||
636	    !priv->broadcast					||
637	    !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
638		if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
639		m_freem(mb);
640		return;
641	}
642
643	mcast = __ipoib_mcast_find(priv, mgid);
644	if (!mcast) {
645		/* Let's create a new send only group now */
646		ipoib_dbg_mcast(priv, "setting up send only multicast group for %16D\n",
647				mgid, ":");
648
649		mcast = ipoib_mcast_alloc(priv, 0);
650		if (!mcast) {
651			ipoib_warn(priv, "unable to allocate memory for "
652				   "multicast structure\n");
653			if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
654			m_freem(mb);
655			goto out;
656		}
657
658		set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
659		memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
660		__ipoib_mcast_add(priv, mcast);
661		list_add_tail(&mcast->list, &priv->multicast_list);
662	}
663
664	if (!mcast->ah) {
665		if (mcast->pkt_queue.ifq_len < IPOIB_MAX_MCAST_QUEUE) {
666			_IF_ENQUEUE(&mcast->pkt_queue, mb);
667		} else {
668			if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
669			m_freem(mb);
670		}
671
672		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
673			ipoib_dbg_mcast(priv, "no address vector, "
674					"but multicast join already started\n");
675		else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
676			ipoib_mcast_sendonly_join(mcast);
677
678		/*
679		 * If lookup completes between here and out:, don't
680		 * want to send packet twice.
681		 */
682		mcast = NULL;
683	}
684
685out:
686	if (mcast && mcast->ah)
687		ipoib_send(priv, mb, mcast->ah, IB_MULTICAST_QPN);
688}
689
690void ipoib_mcast_dev_flush(struct ipoib_dev_priv *priv)
691{
692	LIST_HEAD(remove_list);
693	struct ipoib_mcast *mcast, *tmcast;
694	unsigned long flags;
695
696	ipoib_dbg_mcast(priv, "flushing multicast list\n");
697
698	spin_lock_irqsave(&priv->lock, flags);
699
700	list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
701		list_del(&mcast->list);
702		rb_erase(&mcast->rb_node, &priv->multicast_tree);
703		list_add_tail(&mcast->list, &remove_list);
704	}
705
706	if (priv->broadcast) {
707		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
708		list_add_tail(&priv->broadcast->list, &remove_list);
709		priv->broadcast = NULL;
710	}
711
712	spin_unlock_irqrestore(&priv->lock, flags);
713
714	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
715		ipoib_mcast_leave(priv, mcast);
716		ipoib_mcast_free(mcast);
717	}
718}
719
720static int ipoib_mcast_addr_is_valid(const u8 *addr, unsigned int addrlen,
721				     const u8 *broadcast)
722{
723	if (addrlen != INFINIBAND_ALEN)
724		return 0;
725	/* reserved QPN, prefix, scope */
726	if (memcmp(addr, broadcast, 6))
727		return 0;
728	/* signature lower, pkey */
729	if (memcmp(addr + 7, broadcast + 7, 3))
730		return 0;
731	return 1;
732}
733
734void ipoib_mcast_restart_task(struct work_struct *work)
735{
736	struct ipoib_dev_priv *priv =
737		container_of(work, struct ipoib_dev_priv, restart_task);
738	ipoib_mcast_restart(priv);
739}
740
741struct ipoib_mcast_ctx {
742	struct ipoib_dev_priv *priv;
743	struct list_head remove_list;
744};
745
746static u_int
747ipoib_process_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
748{
749	struct ipoib_mcast_ctx *ctx = arg;
750	struct ipoib_dev_priv *priv = ctx->priv;
751	struct ipoib_mcast *mcast;
752	struct ib_sa_mcmember_rec rec;
753	union ib_gid mgid;
754	uint8_t *addr;
755	int addrlen;
756
757	addr = LLADDR(sdl);
758	addrlen = sdl->sdl_alen;
759	if (!ipoib_mcast_addr_is_valid(addr, addrlen,
760	    priv->dev->if_broadcastaddr))
761		return (0);
762
763	memcpy(mgid.raw, addr + 4, sizeof mgid);
764
765	mcast = __ipoib_mcast_find(priv, &mgid);
766	if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
767		struct ipoib_mcast *nmcast;
768
769		/* ignore group which is directly joined by userspace */
770		if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
771		    !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
772			ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %16D\n",
773					mgid.raw, ":");
774			return (0);
775		}
776
777		/* Not found or send-only group, let's add a new entry */
778		ipoib_dbg_mcast(priv, "adding multicast entry for mgid %16D\n",
779				mgid.raw, ":");
780
781		nmcast = ipoib_mcast_alloc(priv, 0);
782		if (!nmcast) {
783			ipoib_warn(priv, "unable to allocate memory for multicast structure\n");
784			return (0);
785		}
786
787		set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags);
788
789		nmcast->mcmember.mgid = mgid;
790
791		if (mcast) {
792			/* Destroy the send only entry */
793			list_move_tail(&mcast->list, &ctx->remove_list);
794
795			rb_replace_node(&mcast->rb_node,
796					&nmcast->rb_node,
797					&priv->multicast_tree);
798		} else
799			__ipoib_mcast_add(priv, nmcast);
800
801		list_add_tail(&nmcast->list, &priv->multicast_list);
802	}
803
804	if (mcast)
805		set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
806
807	return (1);
808}
809
810void ipoib_mcast_restart(struct ipoib_dev_priv *priv)
811{
812	struct ipoib_mcast_ctx ctx = { priv,
813	    { &ctx.remove_list, &ctx.remove_list }};
814	struct ifnet *dev = priv->dev;
815	struct ipoib_mcast *mcast, *tmcast;
816
817	ipoib_dbg_mcast(priv, "restarting multicast task flags 0x%lX\n",
818	    priv->flags);
819
820	ipoib_mcast_stop_thread(priv, 0);
821
822	spin_lock(&priv->lock);
823
824	/*
825	 * Unfortunately, the networking core only gives us a list of all of
826	 * the multicast hardware addresses. We need to figure out which ones
827	 * are new and which ones have been removed
828	 */
829
830	/* Clear out the found flag */
831	list_for_each_entry(mcast, &priv->multicast_list, list)
832		clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
833
834	/* Mark all of the entries that are found or don't exist */
835	ctx.priv = priv;
836	if_foreach_llmaddr(dev, ipoib_process_maddr, &ctx);
837
838	/* Remove all of the entries don't exist anymore */
839	list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
840		if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&
841		    !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
842			ipoib_dbg_mcast(priv, "deleting multicast group %16D\n",
843					mcast->mcmember.mgid.raw, ":");
844
845			rb_erase(&mcast->rb_node, &priv->multicast_tree);
846
847			/* Move to the remove list */
848			list_move_tail(&mcast->list, &ctx.remove_list);
849		}
850	}
851
852	spin_unlock(&priv->lock);
853
854	/* We have to cancel outside of the spinlock */
855	list_for_each_entry_safe(mcast, tmcast, &ctx.remove_list, list) {
856		ipoib_mcast_leave(mcast->priv, mcast);
857		ipoib_mcast_free(mcast);
858	}
859
860	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
861		ipoib_mcast_start_thread(priv);
862}
863
864#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
865
866struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct ipoib_dev_priv *priv)
867{
868	struct ipoib_mcast_iter *iter;
869
870	iter = kmalloc(sizeof *iter, GFP_KERNEL);
871	if (!iter)
872		return NULL;
873
874	iter->priv = priv;
875	memset(iter->mgid.raw, 0, 16);
876
877	if (ipoib_mcast_iter_next(iter)) {
878		kfree(iter);
879		return NULL;
880	}
881
882	return iter;
883}
884
885int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
886{
887	struct ipoib_dev_priv *priv = iter->priv;
888	struct rb_node *n;
889	struct ipoib_mcast *mcast;
890	int ret = 1;
891
892	spin_lock_irq(&priv->lock);
893
894	n = rb_first(&priv->multicast_tree);
895
896	while (n) {
897		mcast = rb_entry(n, struct ipoib_mcast, rb_node);
898
899		if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw,
900			   sizeof (union ib_gid)) < 0) {
901			iter->mgid      = mcast->mcmember.mgid;
902			iter->created   = mcast->created;
903			iter->queuelen  = mcast->pkt_queue.ifq_len;
904			iter->complete  = !!mcast->ah;
905			iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY));
906
907			ret = 0;
908
909			break;
910		}
911
912		n = rb_next(n);
913	}
914
915	spin_unlock_irq(&priv->lock);
916
917	return ret;
918}
919
920void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
921			   union ib_gid *mgid,
922			   unsigned long *created,
923			   unsigned int *queuelen,
924			   unsigned int *complete,
925			   unsigned int *send_only)
926{
927	*mgid      = iter->mgid;
928	*created   = iter->created;
929	*queuelen  = iter->queuelen;
930	*complete  = iter->complete;
931	*send_only = iter->send_only;
932}
933
934#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
935