1/*-
2 * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3 *
4 * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
5 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
6 * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses.  You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 *     Redistribution and use in source and binary forms, with or
15 *     without modification, are permitted provided that the following
16 *     conditions are met:
17 *
18 *      - Redistributions of source code must retain the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer.
21 *
22 *      - Redistributions in binary form must reproduce the above
23 *        copyright notice, this list of conditions and the following
24 *        disclaimer in the documentation and/or other materials
25 *        provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 */
36
37#include <sys/cdefs.h>
38#include "ipoib.h"
39
40#include <linux/delay.h>
41#include <linux/completion.h>
42
43#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
44static int mcast_debug_level = 1;
45
46module_param(mcast_debug_level, int, 0644);
47MODULE_PARM_DESC(mcast_debug_level,
48		 "Enable multicast debug tracing if > 0");
49#endif
50
51static DEFINE_MUTEX(mcast_mutex);
52
53struct ipoib_mcast_iter {
54	struct ipoib_dev_priv *priv;
55	union ib_gid       mgid;
56	unsigned long      created;
57	unsigned int       queuelen;
58	unsigned int       complete;
59	unsigned int       send_only;
60};
61
62static void ipoib_mcast_free(struct ipoib_mcast *mcast)
63{
64	if_t dev = mcast->priv->dev;
65	int tx_dropped = 0;
66
67	ipoib_dbg_mcast(mcast->priv, "deleting multicast group %16D\n",
68			mcast->mcmember.mgid.raw, ":");
69
70	if (mcast->ah)
71		ipoib_put_ah(mcast->ah);
72
73	tx_dropped = mcast->pkt_queue.ifq_len;
74	_IF_DRAIN(&mcast->pkt_queue);	/* XXX Locking. */
75
76	if_inc_counter(dev, IFCOUNTER_OERRORS, tx_dropped);
77
78	kfree(mcast);
79}
80
81static struct ipoib_mcast *ipoib_mcast_alloc(struct ipoib_dev_priv *priv,
82					     int can_sleep)
83{
84	struct ipoib_mcast *mcast;
85
86	mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
87	if (!mcast)
88		return NULL;
89
90	mcast->priv = priv;
91	mcast->created = jiffies;
92	mcast->backoff = 1;
93
94	INIT_LIST_HEAD(&mcast->list);
95	bzero(&mcast->pkt_queue, sizeof(mcast->pkt_queue));
96
97	return mcast;
98}
99
100static struct ipoib_mcast *__ipoib_mcast_find(struct ipoib_dev_priv *priv,
101    void *mgid)
102{
103	struct rb_node *n = priv->multicast_tree.rb_node;
104
105	while (n) {
106		struct ipoib_mcast *mcast;
107		int ret;
108
109		mcast = rb_entry(n, struct ipoib_mcast, rb_node);
110
111		ret = memcmp(mgid, mcast->mcmember.mgid.raw,
112			     sizeof (union ib_gid));
113		if (ret < 0)
114			n = n->rb_left;
115		else if (ret > 0)
116			n = n->rb_right;
117		else
118			return mcast;
119	}
120
121	return NULL;
122}
123
124static int __ipoib_mcast_add(struct ipoib_dev_priv *priv,
125    struct ipoib_mcast *mcast)
126{
127	struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
128
129	while (*n) {
130		struct ipoib_mcast *tmcast;
131		int ret;
132
133		pn = *n;
134		tmcast = rb_entry(pn, struct ipoib_mcast, rb_node);
135
136		ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
137			     sizeof (union ib_gid));
138		if (ret < 0)
139			n = &pn->rb_left;
140		else if (ret > 0)
141			n = &pn->rb_right;
142		else
143			return -EEXIST;
144	}
145
146	rb_link_node(&mcast->rb_node, pn, n);
147	rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
148
149	return 0;
150}
151
152static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
153				   struct ib_sa_mcmember_rec *mcmember)
154{
155	struct ipoib_dev_priv *priv = mcast->priv;
156	if_t dev = priv->dev;
157	struct ipoib_ah *ah;
158	struct epoch_tracker et;
159	int ret;
160	int set_qkey = 0;
161
162	mcast->mcmember = *mcmember;
163
164	/* Set the cached Q_Key before we attach if it's the broadcast group */
165	if (!memcmp(mcast->mcmember.mgid.raw, if_getbroadcastaddr(dev) + 4,
166		    sizeof (union ib_gid))) {
167		spin_lock_irq(&priv->lock);
168		if (!priv->broadcast) {
169			spin_unlock_irq(&priv->lock);
170			return -EAGAIN;
171		}
172		priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
173		spin_unlock_irq(&priv->lock);
174		priv->tx_wr.remote_qkey = priv->qkey;
175		set_qkey = 1;
176	}
177
178	if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
179		if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
180			ipoib_warn(priv, "multicast group %16D already attached\n",
181				   mcast->mcmember.mgid.raw, ":");
182
183			return 0;
184		}
185
186		ret = ipoib_mcast_attach(priv, be16_to_cpu(mcast->mcmember.mlid),
187					 &mcast->mcmember.mgid, set_qkey);
188		if (ret < 0) {
189			ipoib_warn(priv, "couldn't attach QP to multicast group %16D\n",
190				   mcast->mcmember.mgid.raw, ":");
191
192			clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
193			return ret;
194		}
195	}
196
197	{
198		struct ib_ah_attr av = {
199			.dlid	       = be16_to_cpu(mcast->mcmember.mlid),
200			.port_num      = priv->port,
201			.sl	       = mcast->mcmember.sl,
202			.ah_flags      = IB_AH_GRH,
203			.static_rate   = mcast->mcmember.rate,
204			.grh	       = {
205				.flow_label    = be32_to_cpu(mcast->mcmember.flow_label),
206				.hop_limit     = mcast->mcmember.hop_limit,
207				.sgid_index    = 0,
208				.traffic_class = mcast->mcmember.traffic_class
209			}
210		};
211		av.grh.dgid = mcast->mcmember.mgid;
212
213		ah = ipoib_create_ah(priv, priv->pd, &av);
214		if (!ah) {
215			ipoib_warn(priv, "ib_address_create failed\n");
216		} else {
217			spin_lock_irq(&priv->lock);
218			mcast->ah = ah;
219			spin_unlock_irq(&priv->lock);
220
221			ipoib_dbg_mcast(priv, "MGID %16D AV %p, LID 0x%04x, SL %d\n",
222					mcast->mcmember.mgid.raw, ":",
223					mcast->ah->ah,
224					be16_to_cpu(mcast->mcmember.mlid),
225					mcast->mcmember.sl);
226		}
227	}
228
229	NET_EPOCH_ENTER(et);
230
231	/* actually send any queued packets */
232	while (mcast->pkt_queue.ifq_len) {
233		struct mbuf *mb;
234		_IF_DEQUEUE(&mcast->pkt_queue, mb);
235		mb->m_pkthdr.rcvif = dev;
236
237		if (if_transmit(dev, mb))
238			ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
239	}
240
241	NET_EPOCH_EXIT(et);
242	return 0;
243}
244
245static int
246ipoib_mcast_sendonly_join_complete(int status,
247				   struct ib_sa_multicast *multicast)
248{
249	struct ipoib_mcast *mcast = multicast->context;
250	struct ipoib_dev_priv *priv = mcast->priv;
251
252	/* We trap for port events ourselves. */
253	if (status == -ENETRESET)
254		return 0;
255
256	if (!status)
257		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
258
259	if (status) {
260		if (mcast->logcount++ < 20)
261			ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n",
262					mcast->mcmember.mgid.raw, ":", status);
263
264		/* Flush out any queued packets */
265		if_inc_counter(priv->dev, IFCOUNTER_OERRORS, mcast->pkt_queue.ifq_len);
266		_IF_DRAIN(&mcast->pkt_queue);
267
268		/* Clear the busy flag so we try again */
269		status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
270					    &mcast->flags);
271	}
272	return status;
273}
274
275static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
276{
277	struct ipoib_dev_priv *priv = mcast->priv;
278	struct ib_sa_mcmember_rec rec = {
279#if 0				/* Some SMs don't support send-only yet */
280		.join_state = 4
281#else
282		.join_state = 1
283#endif
284	};
285	int ret = 0;
286
287	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
288		ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
289		return -ENODEV;
290	}
291
292	if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
293		ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
294		return -EBUSY;
295	}
296
297	rec.mgid     = mcast->mcmember.mgid;
298	rec.port_gid = priv->local_gid;
299	rec.pkey     = cpu_to_be16(priv->pkey);
300
301	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
302					 priv->port, &rec,
303					 IB_SA_MCMEMBER_REC_MGID	|
304					 IB_SA_MCMEMBER_REC_PORT_GID	|
305					 IB_SA_MCMEMBER_REC_PKEY	|
306					 IB_SA_MCMEMBER_REC_JOIN_STATE,
307					 GFP_ATOMIC,
308					 ipoib_mcast_sendonly_join_complete,
309					 mcast);
310	if (IS_ERR(mcast->mc)) {
311		ret = PTR_ERR(mcast->mc);
312		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
313		ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
314			   ret);
315	} else {
316		ipoib_dbg_mcast(priv, "no multicast record for %16D, starting join\n",
317				mcast->mcmember.mgid.raw, ":");
318	}
319
320	return ret;
321}
322
323void ipoib_mcast_carrier_on_task(struct work_struct *work)
324{
325	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
326						   carrier_on_task);
327	struct ib_port_attr attr;
328
329	/*
330	 * Take rtnl_lock to avoid racing with ipoib_stop() and
331	 * turning the carrier back on while a device is being
332	 * removed.
333	 */
334	if (ib_query_port(priv->ca, priv->port, &attr) ||
335	    attr.state != IB_PORT_ACTIVE) {
336		ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
337		return;
338	}
339	if_link_state_change(priv->dev, LINK_STATE_UP);
340}
341
342static int ipoib_mcast_join_complete(int status,
343				     struct ib_sa_multicast *multicast)
344{
345	struct ipoib_mcast *mcast = multicast->context;
346	struct ipoib_dev_priv *priv = mcast->priv;
347
348	ipoib_dbg_mcast(priv, "join completion for %16D (status %d)\n",
349			mcast->mcmember.mgid.raw, ":", status);
350
351	/* We trap for port events ourselves. */
352	if (status == -ENETRESET)
353		return 0;
354
355	if (!status)
356		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
357
358	if (!status) {
359		mcast->backoff = 1;
360		mutex_lock(&mcast_mutex);
361		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
362			queue_delayed_work(ipoib_workqueue,
363					   &priv->mcast_task, 0);
364		mutex_unlock(&mcast_mutex);
365
366		/*
367		 * Defer carrier on work to ipoib_workqueue to avoid a
368		 * deadlock on rtnl_lock here.
369		 */
370		if (mcast == priv->broadcast)
371			queue_work(ipoib_workqueue, &priv->carrier_on_task);
372
373		return 0;
374	}
375
376	if (mcast->logcount++ < 20) {
377		if (status == -ETIMEDOUT || status == -EAGAIN) {
378			ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n",
379					mcast->mcmember.mgid.raw, ":", status);
380		} else {
381			ipoib_warn(priv, "multicast join failed for %16D, status %d\n",
382				   mcast->mcmember.mgid.raw, ":", status);
383		}
384	}
385
386	mcast->backoff *= 2;
387	if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
388		mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
389
390	/* Clear the busy flag so we try again */
391	status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
392
393	mutex_lock(&mcast_mutex);
394	spin_lock_irq(&priv->lock);
395	if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
396		queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
397				   mcast->backoff * HZ);
398	spin_unlock_irq(&priv->lock);
399	mutex_unlock(&mcast_mutex);
400
401	return status;
402}
403
404static void ipoib_mcast_join(struct ipoib_dev_priv *priv,
405    struct ipoib_mcast *mcast, int create)
406{
407	struct ib_sa_mcmember_rec rec = {
408		.join_state = 1
409	};
410	ib_sa_comp_mask comp_mask;
411	int ret = 0;
412
413	ipoib_dbg_mcast(priv, "joining MGID %16D\n",
414	    mcast->mcmember.mgid.raw, ":");
415
416	rec.mgid     = mcast->mcmember.mgid;
417	rec.port_gid = priv->local_gid;
418	rec.pkey     = cpu_to_be16(priv->pkey);
419
420	comp_mask =
421		IB_SA_MCMEMBER_REC_MGID		|
422		IB_SA_MCMEMBER_REC_PORT_GID	|
423		IB_SA_MCMEMBER_REC_PKEY		|
424		IB_SA_MCMEMBER_REC_JOIN_STATE;
425
426	if (create) {
427		comp_mask |=
428			IB_SA_MCMEMBER_REC_QKEY			|
429			IB_SA_MCMEMBER_REC_MTU_SELECTOR		|
430			IB_SA_MCMEMBER_REC_MTU			|
431			IB_SA_MCMEMBER_REC_TRAFFIC_CLASS	|
432			IB_SA_MCMEMBER_REC_RATE_SELECTOR	|
433			IB_SA_MCMEMBER_REC_RATE			|
434			IB_SA_MCMEMBER_REC_SL			|
435			IB_SA_MCMEMBER_REC_FLOW_LABEL		|
436			IB_SA_MCMEMBER_REC_HOP_LIMIT;
437
438		rec.qkey	  = priv->broadcast->mcmember.qkey;
439		rec.mtu_selector  = IB_SA_EQ;
440		rec.mtu		  = priv->broadcast->mcmember.mtu;
441		rec.traffic_class = priv->broadcast->mcmember.traffic_class;
442		rec.rate_selector = IB_SA_EQ;
443		rec.rate	  = priv->broadcast->mcmember.rate;
444		rec.sl		  = priv->broadcast->mcmember.sl;
445		rec.flow_label	  = priv->broadcast->mcmember.flow_label;
446		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
447	}
448
449	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
450	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
451					 &rec, comp_mask, GFP_KERNEL,
452					 ipoib_mcast_join_complete, mcast);
453	if (IS_ERR(mcast->mc)) {
454		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
455		ret = PTR_ERR(mcast->mc);
456		ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
457
458		mcast->backoff *= 2;
459		if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
460			mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
461
462		mutex_lock(&mcast_mutex);
463		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
464			queue_delayed_work(ipoib_workqueue,
465					   &priv->mcast_task,
466					   mcast->backoff * HZ);
467		mutex_unlock(&mcast_mutex);
468	}
469}
470
471void ipoib_mcast_join_task(struct work_struct *work)
472{
473	struct ipoib_dev_priv *priv =
474		container_of(work, struct ipoib_dev_priv, mcast_task.work);
475	if_t dev = priv->dev;
476	struct ib_port_attr attr;
477
478	ipoib_dbg_mcast(priv, "Running join task. flags 0x%lX\n", priv->flags);
479
480	if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
481		return;
482
483	if (ib_query_port(priv->ca, priv->port, &attr) ||
484            attr.state != IB_PORT_ACTIVE) {
485		ipoib_dbg(priv, "%s: port state is not ACTIVE (state = %d) suspend task.\n",
486                          __func__, attr.state);
487		return;
488	}
489
490	if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL))
491		ipoib_warn(priv, "ib_query_gid() failed\n");
492	else
493		memcpy(if_getlladdr(dev) + 4, priv->local_gid.raw, sizeof (union ib_gid));
494
495	{
496		struct ib_port_attr attr;
497
498		if (!ib_query_port(priv->ca, priv->port, &attr))
499			priv->local_lid = attr.lid;
500		else
501			ipoib_warn(priv, "ib_query_port failed\n");
502	}
503
504	if (!priv->broadcast) {
505		struct ipoib_mcast *broadcast;
506
507		if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
508			return;
509
510		broadcast = ipoib_mcast_alloc(priv, 1);
511		if (!broadcast) {
512			ipoib_warn(priv, "failed to allocate broadcast group\n");
513			mutex_lock(&mcast_mutex);
514			if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
515				queue_delayed_work(ipoib_workqueue,
516						   &priv->mcast_task, HZ);
517			mutex_unlock(&mcast_mutex);
518			return;
519		}
520
521		spin_lock_irq(&priv->lock);
522		memcpy(broadcast->mcmember.mgid.raw, if_getbroadcastaddr(dev) + 4,
523		       sizeof (union ib_gid));
524		priv->broadcast = broadcast;
525
526		__ipoib_mcast_add(priv, priv->broadcast);
527		spin_unlock_irq(&priv->lock);
528	}
529
530	if (priv->broadcast &&
531	    !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
532		if (priv->broadcast &&
533		    !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
534			ipoib_mcast_join(priv, priv->broadcast, 0);
535		return;
536	}
537
538	while (1) {
539		struct ipoib_mcast *mcast = NULL;
540
541		spin_lock_irq(&priv->lock);
542		list_for_each_entry(mcast, &priv->multicast_list, list) {
543			if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
544			    && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
545			    && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
546				/* Found the next unjoined group */
547				break;
548			}
549		}
550		spin_unlock_irq(&priv->lock);
551
552		if (&mcast->list == &priv->multicast_list) {
553			/* All done */
554			break;
555		}
556
557		ipoib_mcast_join(priv, mcast, 1);
558		return;
559	}
560
561	spin_lock_irq(&priv->lock);
562	if (priv->broadcast)
563		priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
564	else
565		priv->mcast_mtu = priv->admin_mtu;
566	spin_unlock_irq(&priv->lock);
567
568	if (!ipoib_cm_admin_enabled(priv))
569		ipoib_change_mtu(priv, min(priv->mcast_mtu, priv->admin_mtu),
570		    true);
571
572	ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
573
574	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
575}
576
577int ipoib_mcast_start_thread(struct ipoib_dev_priv *priv)
578{
579	ipoib_dbg_mcast(priv, "starting multicast thread flags 0x%lX\n",
580	    priv->flags);
581
582	mutex_lock(&mcast_mutex);
583	if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
584		queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
585	mutex_unlock(&mcast_mutex);
586
587	return 0;
588}
589
590int ipoib_mcast_stop_thread(struct ipoib_dev_priv *priv, int flush)
591{
592
593	ipoib_dbg_mcast(priv, "stopping multicast thread\n");
594
595	mutex_lock(&mcast_mutex);
596	clear_bit(IPOIB_MCAST_RUN, &priv->flags);
597	cancel_delayed_work(&priv->mcast_task);
598	mutex_unlock(&mcast_mutex);
599
600	if (flush)
601		flush_workqueue(ipoib_workqueue);
602
603	return 0;
604}
605
606static int ipoib_mcast_leave(struct ipoib_dev_priv *priv, struct ipoib_mcast *mcast)
607{
608	int ret = 0;
609
610	if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
611		ib_sa_free_multicast(mcast->mc);
612
613	if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
614		ipoib_dbg_mcast(priv, "leaving MGID %16D\n",
615				mcast->mcmember.mgid.raw, ":");
616
617		/* Remove ourselves from the multicast group */
618		ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
619				      be16_to_cpu(mcast->mcmember.mlid));
620		if (ret)
621			ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
622	}
623
624	return 0;
625}
626
627void
628ipoib_mcast_send(struct ipoib_dev_priv *priv, void *mgid, struct mbuf *mb)
629{
630	if_t dev = priv->dev;
631	struct ipoib_mcast *mcast;
632
633	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)		||
634	    !priv->broadcast					||
635	    !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
636		if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
637		m_freem(mb);
638		return;
639	}
640
641	mcast = __ipoib_mcast_find(priv, mgid);
642	if (!mcast) {
643		/* Let's create a new send only group now */
644		ipoib_dbg_mcast(priv, "setting up send only multicast group for %16D\n",
645				mgid, ":");
646
647		mcast = ipoib_mcast_alloc(priv, 0);
648		if (!mcast) {
649			ipoib_warn(priv, "unable to allocate memory for "
650				   "multicast structure\n");
651			if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
652			m_freem(mb);
653			goto out;
654		}
655
656		set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
657		memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
658		__ipoib_mcast_add(priv, mcast);
659		list_add_tail(&mcast->list, &priv->multicast_list);
660	}
661
662	if (!mcast->ah) {
663		if (mcast->pkt_queue.ifq_len < IPOIB_MAX_MCAST_QUEUE) {
664			_IF_ENQUEUE(&mcast->pkt_queue, mb);
665		} else {
666			if_inc_counter(dev, IFCOUNTER_OERRORS, 1);
667			m_freem(mb);
668		}
669
670		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
671			ipoib_dbg_mcast(priv, "no address vector, "
672					"but multicast join already started\n");
673		else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
674			ipoib_mcast_sendonly_join(mcast);
675
676		/*
677		 * If lookup completes between here and out:, don't
678		 * want to send packet twice.
679		 */
680		mcast = NULL;
681	}
682
683out:
684	if (mcast && mcast->ah)
685		ipoib_send(priv, mb, mcast->ah, IB_MULTICAST_QPN);
686}
687
688void ipoib_mcast_dev_flush(struct ipoib_dev_priv *priv)
689{
690	LIST_HEAD(remove_list);
691	struct ipoib_mcast *mcast, *tmcast;
692	unsigned long flags;
693
694	ipoib_dbg_mcast(priv, "flushing multicast list\n");
695
696	spin_lock_irqsave(&priv->lock, flags);
697
698	list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
699		list_del(&mcast->list);
700		rb_erase(&mcast->rb_node, &priv->multicast_tree);
701		list_add_tail(&mcast->list, &remove_list);
702	}
703
704	if (priv->broadcast) {
705		rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
706		list_add_tail(&priv->broadcast->list, &remove_list);
707		priv->broadcast = NULL;
708	}
709
710	spin_unlock_irqrestore(&priv->lock, flags);
711
712	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
713		ipoib_mcast_leave(priv, mcast);
714		ipoib_mcast_free(mcast);
715	}
716}
717
718static int ipoib_mcast_addr_is_valid(const u8 *addr, unsigned int addrlen,
719				     const u8 *broadcast)
720{
721	if (addrlen != INFINIBAND_ALEN)
722		return 0;
723	/* reserved QPN, prefix, scope */
724	if (memcmp(addr, broadcast, 6))
725		return 0;
726	/* signature lower, pkey */
727	if (memcmp(addr + 7, broadcast + 7, 3))
728		return 0;
729	return 1;
730}
731
732void ipoib_mcast_restart_task(struct work_struct *work)
733{
734	struct ipoib_dev_priv *priv =
735		container_of(work, struct ipoib_dev_priv, restart_task);
736	ipoib_mcast_restart(priv);
737}
738
739struct ipoib_mcast_ctx {
740	struct ipoib_dev_priv *priv;
741	struct list_head remove_list;
742};
743
744static u_int
745ipoib_process_maddr(void *arg, struct sockaddr_dl *sdl, u_int cnt)
746{
747	struct ipoib_mcast_ctx *ctx = arg;
748	struct ipoib_dev_priv *priv = ctx->priv;
749	struct ipoib_mcast *mcast;
750	struct ib_sa_mcmember_rec rec;
751	union ib_gid mgid;
752	uint8_t *addr;
753	int addrlen;
754
755	addr = LLADDR(sdl);
756	addrlen = sdl->sdl_alen;
757	if (!ipoib_mcast_addr_is_valid(addr, addrlen,
758	    if_getbroadcastaddr(priv->dev)))
759		return (0);
760
761	memcpy(mgid.raw, addr + 4, sizeof mgid);
762
763	mcast = __ipoib_mcast_find(priv, &mgid);
764	if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
765		struct ipoib_mcast *nmcast;
766
767		/* ignore group which is directly joined by userspace */
768		if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
769		    !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
770			ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %16D\n",
771					mgid.raw, ":");
772			return (0);
773		}
774
775		/* Not found or send-only group, let's add a new entry */
776		ipoib_dbg_mcast(priv, "adding multicast entry for mgid %16D\n",
777				mgid.raw, ":");
778
779		nmcast = ipoib_mcast_alloc(priv, 0);
780		if (!nmcast) {
781			ipoib_warn(priv, "unable to allocate memory for multicast structure\n");
782			return (0);
783		}
784
785		set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags);
786
787		nmcast->mcmember.mgid = mgid;
788
789		if (mcast) {
790			/* Destroy the send only entry */
791			list_move_tail(&mcast->list, &ctx->remove_list);
792
793			rb_replace_node(&mcast->rb_node,
794					&nmcast->rb_node,
795					&priv->multicast_tree);
796		} else
797			__ipoib_mcast_add(priv, nmcast);
798
799		list_add_tail(&nmcast->list, &priv->multicast_list);
800	}
801
802	if (mcast)
803		set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
804
805	return (1);
806}
807
808void ipoib_mcast_restart(struct ipoib_dev_priv *priv)
809{
810	struct ipoib_mcast_ctx ctx = { priv,
811	    { &ctx.remove_list, &ctx.remove_list }};
812	if_t dev = priv->dev;
813	struct ipoib_mcast *mcast, *tmcast;
814
815	ipoib_dbg_mcast(priv, "restarting multicast task flags 0x%lX\n",
816	    priv->flags);
817
818	ipoib_mcast_stop_thread(priv, 0);
819
820	spin_lock(&priv->lock);
821
822	/*
823	 * Unfortunately, the networking core only gives us a list of all of
824	 * the multicast hardware addresses. We need to figure out which ones
825	 * are new and which ones have been removed
826	 */
827
828	/* Clear out the found flag */
829	list_for_each_entry(mcast, &priv->multicast_list, list)
830		clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
831
832	/* Mark all of the entries that are found or don't exist */
833	ctx.priv = priv;
834	if_foreach_llmaddr(dev, ipoib_process_maddr, &ctx);
835
836	/* Remove all of the entries don't exist anymore */
837	list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
838		if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&
839		    !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
840			ipoib_dbg_mcast(priv, "deleting multicast group %16D\n",
841					mcast->mcmember.mgid.raw, ":");
842
843			rb_erase(&mcast->rb_node, &priv->multicast_tree);
844
845			/* Move to the remove list */
846			list_move_tail(&mcast->list, &ctx.remove_list);
847		}
848	}
849
850	spin_unlock(&priv->lock);
851
852	/* We have to cancel outside of the spinlock */
853	list_for_each_entry_safe(mcast, tmcast, &ctx.remove_list, list) {
854		ipoib_mcast_leave(mcast->priv, mcast);
855		ipoib_mcast_free(mcast);
856	}
857
858	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
859		ipoib_mcast_start_thread(priv);
860}
861
862#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
863
864struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct ipoib_dev_priv *priv)
865{
866	struct ipoib_mcast_iter *iter;
867
868	iter = kmalloc(sizeof *iter, GFP_KERNEL);
869	if (!iter)
870		return NULL;
871
872	iter->priv = priv;
873	memset(iter->mgid.raw, 0, 16);
874
875	if (ipoib_mcast_iter_next(iter)) {
876		kfree(iter);
877		return NULL;
878	}
879
880	return iter;
881}
882
883int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
884{
885	struct ipoib_dev_priv *priv = iter->priv;
886	struct rb_node *n;
887	struct ipoib_mcast *mcast;
888	int ret = 1;
889
890	spin_lock_irq(&priv->lock);
891
892	n = rb_first(&priv->multicast_tree);
893
894	while (n) {
895		mcast = rb_entry(n, struct ipoib_mcast, rb_node);
896
897		if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw,
898			   sizeof (union ib_gid)) < 0) {
899			iter->mgid      = mcast->mcmember.mgid;
900			iter->created   = mcast->created;
901			iter->queuelen  = mcast->pkt_queue.ifq_len;
902			iter->complete  = !!mcast->ah;
903			iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY));
904
905			ret = 0;
906
907			break;
908		}
909
910		n = rb_next(n);
911	}
912
913	spin_unlock_irq(&priv->lock);
914
915	return ret;
916}
917
918void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
919			   union ib_gid *mgid,
920			   unsigned long *created,
921			   unsigned int *queuelen,
922			   unsigned int *complete,
923			   unsigned int *send_only)
924{
925	*mgid      = iter->mgid;
926	*created   = iter->created;
927	*queuelen  = iter->queuelen;
928	*complete  = iter->complete;
929	*send_only = iter->send_only;
930}
931
932#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
933